From 759ff4b91024835d3bf436b993b0f39e276c36fe Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 22 Apr 2006 18:45:01 -0400 Subject: Updates for OzoneCPU. build/SConstruct: Include Ozone CPU models. cpu/cpu_models.py: Include OzoneCPU models. --HG-- extra : convert_revision : 51a016c216cacd2cc613eed79653026c2edda4b3 --- cpu/ozone/back_end.cc | 5 + cpu/ozone/back_end.hh | 509 ++++++++++ cpu/ozone/back_end_impl.hh | 1853 ++++++++++++++++++++++++++++++++++++ cpu/ozone/cpu.cc | 9 +- cpu/ozone/cpu.hh | 608 +++++------- cpu/ozone/cpu_builder.cc | 818 ++++++++++++++++ cpu/ozone/cpu_impl.hh | 1140 +++++++++++++++++++++- cpu/ozone/dyn_inst.cc | 35 + cpu/ozone/dyn_inst.hh | 261 +++++ cpu/ozone/dyn_inst_impl.hh | 286 ++++++ cpu/ozone/front_end.cc | 7 + cpu/ozone/front_end.hh | 242 +++++ cpu/ozone/front_end_impl.hh | 798 ++++++++++++++++ cpu/ozone/inorder_back_end.cc | 5 + cpu/ozone/inorder_back_end.hh | 417 ++++++++ cpu/ozone/inorder_back_end_impl.hh | 519 ++++++++++ cpu/ozone/inst_queue.cc | 36 + cpu/ozone/inst_queue.hh | 506 ++++++++++ cpu/ozone/inst_queue_impl.hh | 1341 ++++++++++++++++++++++++++ cpu/ozone/lsq_unit.cc | 34 + cpu/ozone/lsq_unit.hh | 632 ++++++++++++ cpu/ozone/lsq_unit_impl.hh | 846 ++++++++++++++++ cpu/ozone/null_predictor.hh | 76 ++ cpu/ozone/ozone_impl.hh | 73 ++ cpu/ozone/rename_table.cc | 7 + cpu/ozone/rename_table.hh | 25 + cpu/ozone/rename_table_impl.hh | 23 + cpu/ozone/simple_impl.hh | 69 ++ cpu/ozone/simple_params.hh | 164 ++++ cpu/ozone/thread_state.hh | 171 ++++ 30 files changed, 11142 insertions(+), 373 deletions(-) create mode 100644 cpu/ozone/back_end.cc create mode 100644 cpu/ozone/back_end.hh create mode 100644 cpu/ozone/back_end_impl.hh create mode 100644 cpu/ozone/cpu_builder.cc create mode 100644 cpu/ozone/dyn_inst.cc create mode 100644 cpu/ozone/dyn_inst.hh create mode 100644 cpu/ozone/dyn_inst_impl.hh create mode 100644 cpu/ozone/front_end.cc create mode 100644 cpu/ozone/front_end.hh create mode 100644 cpu/ozone/front_end_impl.hh create mode 100644 cpu/ozone/inorder_back_end.cc create mode 100644 cpu/ozone/inorder_back_end.hh create mode 100644 cpu/ozone/inorder_back_end_impl.hh create mode 100644 cpu/ozone/inst_queue.cc create mode 100644 cpu/ozone/inst_queue.hh create mode 100644 cpu/ozone/inst_queue_impl.hh create mode 100644 cpu/ozone/lsq_unit.cc create mode 100644 cpu/ozone/lsq_unit.hh create mode 100644 cpu/ozone/lsq_unit_impl.hh create mode 100644 cpu/ozone/null_predictor.hh create mode 100644 cpu/ozone/ozone_impl.hh create mode 100644 cpu/ozone/rename_table.cc create mode 100644 cpu/ozone/rename_table.hh create mode 100644 cpu/ozone/rename_table_impl.hh create mode 100644 cpu/ozone/simple_impl.hh create mode 100644 cpu/ozone/simple_params.hh create mode 100644 cpu/ozone/thread_state.hh (limited to 'cpu/ozone') diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc new file mode 100644 index 000000000..dbab5435e --- /dev/null +++ b/cpu/ozone/back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +template class BackEnd; diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh new file mode 100644 index 000000000..0713a0143 --- /dev/null +++ b/cpu/ozone/back_end.hh @@ -0,0 +1,509 @@ + +#ifndef __CPU_OZONE_BACK_END_HH__ +#define __CPU_OZONE_BACK_END_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +class ExecContext; + +template +class OzoneThreadState; + +template +class BackEnd +{ + public: + typedef OzoneThreadState Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer d2i; + typename TimeBuffer::wire instsToDispatch; + TimeBuffer i2e; + typename TimeBuffer::wire instsToExecute; + TimeBuffer e2c; + TimeBuffer numInstsToWB; + + TimeBuffer *comm; + typename TimeBuffer::wire toIEW; + typename TimeBuffer::wire fromCommit; + + class InstQueue { + enum queue { + NonSpec, + IQ, + ToBeScheduled, + ReadyList, + ReplayList + }; + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + public: + InstQueue(Params *params); + + std::string name() const; + + void regStats(); + + void setIssueExecQueue(TimeBuffer *i2e_queue); + + void setBE(BackEnd *_be) { be = _be; } + + void insert(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &sn); + + DynInstPtr getReadyInst(); + + void commit(const InstSeqNum &sn) {} + + void squash(const InstSeqNum &sn); + + int wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst); + + void violation(DynInstPtr &inst, DynInstPtr &violation) { } + + bool isFull() { return numInsts >= size; } + + void dumpInsts(); + + private: + bool find(queue q, typename std::list::iterator it); + BackEnd *be; + TimeBuffer *i2e; + typename TimeBuffer::wire numIssued; + typedef typename std::list InstList; + typedef typename std::list::iterator InstListIt; + typedef typename std::priority_queue, pqCompare> ReadyInstQueue; + // Not sure I need the IQ list; it just needs to be a count. + InstList iq; + InstList toBeScheduled; + InstList readyList; + InstList nonSpec; + InstList replayList; + ReadyInstQueue readyQueue; + int size; + int numInsts; + int width; + + Stats::VectorDistribution<> occ_dist; + + Stats::Vector<> inst_count; + Stats::Vector<> peak_inst_count; + Stats::Scalar<> empty_count; + Stats::Scalar<> current_count; + Stats::Scalar<> fullCount; + + Stats::Formula occ_rate; + Stats::Formula avg_residency; + Stats::Formula empty_rate; + Stats::Formula full_rate; + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + BackEnd *be; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, BackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + }; + + BackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void tick(); + void squash(); + void squashFromXC(); + bool xcSquash; + + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst) + { IQ.rescheduleMemInst(inst); } + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst) + { IQ.replayMemInst(inst); } + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) + { IQ.completeMemInst(inst); } + + void fetchFault(Fault &fault); + + private: + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void scheduleReadyInsts(); + void executeInsts(); + void commitInsts(); + void addToIQ(DynInstPtr &inst); + void addToLSQ(DynInstPtr &inst); + void instToCommit(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + Status dispatchStatus; + + Counter funcExeInst; + + private: +// typedef typename Impl::InstQueue InstQueue; + + InstQueue IQ; + + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable commitRenameTable; + + RenameTable renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + BackEnd *be; + + public: + DCacheCompletionEvent(BackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + private: + typedef typename std::list::iterator InstListIt; + + std::list instList; + std::list dispatch; + std::list writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + bool fetchRedirect[Impl::MaxThreads]; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); +}; + +template +template +Fault +BackEnd::read(MemReqPtr &req, T &data, int load_idx) +{ +/* memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + --funcExeInst; + + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } else { + // do functional access + fault = thread->mem->read(memReq, data); + + } + } +*/ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return LSQ.read(req, data, load_idx); +} + +template +template +Fault +BackEnd::write(MemReqPtr &req, T &data, int store_idx) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; + memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; + */ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_BACK_END_HH__ diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh new file mode 100644 index 000000000..807afaf2e --- /dev/null +++ b/cpu/ozone/back_end_impl.hh @@ -0,0 +1,1853 @@ + +#include "encumbered/cpu/full/op_class.hh" +#include "cpu/ozone/back_end.hh" + +template +BackEnd::InstQueue::InstQueue(Params *params) + : size(params->numIQEntries), numInsts(0), width(params->issueWidth) +{ +} + +template +std::string +BackEnd::InstQueue::name() const +{ + return be->name() + ".iq"; +} + +template +void +BackEnd::InstQueue::regStats() +{ + using namespace Stats; + + occ_dist + .init(1, 0, size, 2) + .name(name() + "occ_dist") + .desc("IQ Occupancy per cycle") + .flags(total | cdf) + ; + + inst_count + .init(1) + .name(name() + "cum_num_insts") + .desc("Total occupancy") + .flags(total) + ; + + peak_inst_count + .init(1) + .name(name() + "peak_occupancy") + .desc("Peak IQ occupancy") + .flags(total) + ; + + current_count + .name(name() + "current_count") + .desc("Occupancy this cycle") + ; + + empty_count + .name(name() + "empty_count") + .desc("Number of empty cycles") + ; + + fullCount + .name(name() + "full_count") + .desc("Number of full cycles") + ; + + + occ_rate + .name(name() + "occ_rate") + .desc("Average occupancy") + .flags(total) + ; + occ_rate = inst_count / be->cpu->numCycles; + + avg_residency + .name(name() + "avg_residency") + .desc("Average IQ residency") + .flags(total) + ; + avg_residency = occ_rate / be->cpu->numCycles; + + empty_rate + .name(name() + "empty_rate") + .desc("Fraction of cycles empty") + ; + empty_rate = 100 * empty_count / be->cpu->numCycles; + + full_rate + .name(name() + "full_rate") + .desc("Fraction of cycles full") + ; + full_rate = 100 * fullCount / be->cpu->numCycles; +} + +template +void +BackEnd::InstQueue::setIssueExecQueue(TimeBuffer *i2e_queue) +{ + i2e = i2e_queue; + numIssued = i2e->getWire(0); +} + +template +void +BackEnd::InstQueue::insert(DynInstPtr &inst) +{ + numInsts++; + inst_count[0]++; + if (!inst->isNonSpeculative()) { + if (inst->readyToIssue()) { + toBeScheduled.push_front(inst); + inst->iqIt = toBeScheduled.begin(); + inst->iqItValid = true; + } else { + iq.push_front(inst); + inst->iqIt = iq.begin(); + inst->iqItValid = true; + } + } else { + nonSpec.push_front(inst); + inst->iqIt = nonSpec.begin(); + inst->iqItValid = true; + } +} + +template +void +BackEnd::InstQueue::scheduleReadyInsts() +{ + int scheduled = numIssued->size; + InstListIt iq_it = --toBeScheduled.end(); + InstListIt iq_end_it = toBeScheduled.end(); + + while (iq_it != iq_end_it && scheduled < width) { +// if ((*iq_it)->readyToIssue()) { + DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n", + (*iq_it)->seqNum, (*iq_it)->readPC()); + readyQueue.push(*iq_it); + readyList.push_front(*iq_it); + + (*iq_it)->iqIt = readyList.begin(); + + toBeScheduled.erase(iq_it--); + + ++scheduled; +// } else { +// iq_it++; +// } + } + + numIssued->size+= scheduled; +} + +template +void +BackEnd::InstQueue::scheduleNonSpec(const InstSeqNum &sn) +{ +/* + InstListIt non_spec_it = nonSpec.begin(); + InstListIt non_spec_end_it = nonSpec.end(); + + while ((*non_spec_it)->seqNum != sn) { + non_spec_it++; + assert(non_spec_it != non_spec_end_it); + } +*/ + DynInstPtr inst = nonSpec.back(); + + assert(inst->seqNum == sn); + + assert(find(NonSpec, inst->iqIt)); + nonSpec.erase(inst->iqIt); + readyList.push_front(inst); + inst->iqIt = readyList.begin(); + readyQueue.push(inst); + numIssued->size++; +} + +template +typename Impl::DynInstPtr +BackEnd::InstQueue::getReadyInst() +{ + assert(!readyList.empty()); + + DynInstPtr inst = readyQueue.top(); + readyQueue.pop(); + assert(find(ReadyList, inst->iqIt)); + readyList.erase(inst->iqIt); + inst->iqItValid = false; +// if (!inst->isMemRef()) + --numInsts; + return inst; +} + +template +void +BackEnd::InstQueue::squash(const InstSeqNum &sn) +{ + InstListIt iq_it = iq.begin(); + InstListIt iq_end_it = iq.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + (*iq_it)->iqItValid = false; + iq.erase(iq_it++); + --numInsts; + } + + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + (*iq_it)->iqItValid = false; + nonSpec.erase(iq_it++); + --numInsts; + } + + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + + while (iq_it != iq_end_it) { + if ((*iq_it)->seqNum > sn) { + (*iq_it)->iqItValid = false; + replayList.erase(iq_it++); + --numInsts; + } else { + iq_it++; + } + } + + assert(numInsts >= 0); +/* + InstListIt ready_it = readyList.begin(); + InstListIt ready_end_it = readyList.end(); + + while (ready_it != ready_end_it) { + if ((*ready_it)->seqNum > sn) { + readyList.erase(ready_it++); + } else { + ready_it++; + } + } +*/ +} + +template +int +BackEnd::InstQueue::wakeDependents(DynInstPtr &inst) +{ + assert(!inst->isSquashed()); + std::vector &dependents = inst->getDependents(); + int num_outputs = dependents.size(); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr inst = dependents[i]; + inst->markSrcRegReady(); + if (inst->readyToIssue() && inst->iqItValid) { + if (inst->isNonSpeculative()) { + assert(find(NonSpec, inst->iqIt)); + nonSpec.erase(inst->iqIt); + } else { + assert(find(IQ, inst->iqIt)); + iq.erase(inst->iqIt); + } + + toBeScheduled.push_front(inst); + inst->iqIt = toBeScheduled.begin(); + } + } + return num_outputs; +} + +template +void +BackEnd::InstQueue::rescheduleMemInst(DynInstPtr &inst) +{ + assert(!inst->iqItValid); + replayList.push_front(inst); + inst->iqIt = replayList.begin(); + inst->iqItValid = true; + ++numInsts; +} + +template +void +BackEnd::InstQueue::replayMemInst(DynInstPtr &inst) +{ + assert(find(ReplayList, inst->iqIt)); + InstListIt iq_it = --replayList.end(); + InstListIt iq_end_it = replayList.end(); + while (iq_it != iq_end_it) { + DynInstPtr rescheduled_inst = (*iq_it); + replayList.erase(iq_it--); + toBeScheduled.push_front(rescheduled_inst); + rescheduled_inst->iqIt = toBeScheduled.begin(); + } +} + +template +void +BackEnd::InstQueue::completeMemInst(DynInstPtr &inst) +{ + panic("Not implemented."); +} + +template +bool +BackEnd::InstQueue::find(queue q, InstListIt it) +{ + InstListIt iq_it, iq_end_it; + switch(q) { + case NonSpec: + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + break; + case IQ: + iq_it = iq.begin(); + iq_end_it = iq.end(); + break; + case ToBeScheduled: + iq_it = toBeScheduled.begin(); + iq_end_it = toBeScheduled.end(); + break; + case ReadyList: + iq_it = readyList.begin(); + iq_end_it = readyList.end(); + break; + case ReplayList: + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + } + + while (iq_it != it && iq_it != iq_end_it) { + iq_it++; + } + if (iq_it == it) { + return true; + } else { + return false; + } +} + +template +void +BackEnd::InstQueue::dumpInsts() +{ + cprintf("IQ size: %i\n", iq.size()); + + InstListIt inst_list_it = --iq.end(); + + int num = 0; + int valid_num = 0; + while (inst_list_it != iq.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("nonSpec size: %i\n", nonSpec.size()); + + inst_list_it = --nonSpec.end(); + + while (inst_list_it != nonSpec.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("toBeScheduled size: %i\n", toBeScheduled.size()); + + inst_list_it = --toBeScheduled.end(); + + while (inst_list_it != toBeScheduled.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("readyList size: %i\n", readyList.size()); + + inst_list_it = --readyList.end(); + + while (inst_list_it != readyList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } +} + +template +BackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + BackEnd *_be) + : Event(&mainEventQueue), inst(_inst), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +BackEnd::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template +const char * +BackEnd::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template +BackEnd::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template +void +BackEnd::DCacheCompletionEvent::process() +{ +} + +template +const char * +BackEnd::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template +BackEnd::BackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + xcSquash(false), IQ(params), + cacheCompletionEvent(this), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + IQ.setBE(this); + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + + IQ.setIssueExecQueue(&i2e); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template +std::string +BackEnd::name() const +{ + return cpu->name() + ".backend"; +} + +template +void +BackEnd::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; inumber_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; + + IQ.regStats(); +} + +template +void +BackEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +template +void +BackEnd::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + ROB_count[0]+= numInsts; + + wbCycle = 0; + + if (xcSquash) { + squashFromXC(); + } + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + + if (dispatchStatus != Blocked) { + d2i.advance(); + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + i2e.advance(); + scheduleReadyInsts(); + + e2c.advance(); + executeInsts(); + + numInstsToWB.advance(); + writebackInsts(); + + commitInsts(); + + assert(numInsts == instList.size()); +} + +template +void +BackEnd::updateStructures() +{ + if (fromCommit->doneSeqNum) { + IQ.commit(fromCommit->doneSeqNum); + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { + LSQ.executeLoad(fromCommit->lqIdx); + } else { + IQ.scheduleNonSpec( + fromCommit->nonSpecSeqNum); + } + } +} + +template +void +BackEnd::addToIQ(DynInstPtr &inst) +{ + // Do anything IQ specific here? + IQ.insert(inst); +} + +template +void +BackEnd::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template +void +BackEnd::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + // Pull instructions out of the front end. + int disp_width = dispatchWidth ? dispatchWidth : width; + + // Could model dispatching time, but in general 1 cycle is probably + // good enough. + + if (dispatchSize < numDispatchEntries) { + for (int i = 0; i < disp_width; i++) { + // Get instructions + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) { + // No more instructions to get + break; + } + + DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + // Add to queue to be dispatched. + dispatch.push_back(inst); + + d2i[0].size++; + ++dispatchSize; + } + } + + assert(dispatch.size() < 64); + + for (int i = 0; i < instsToDispatch->size; ++i) { + assert(!dispatch.empty()); + // Get instruction from front of time buffer + DynInstPtr inst = dispatch.front(); + dispatch.pop_front(); + + if (inst->isSquashed()) + continue; + + --dispatchSize; + ++numInsts; + instList.push_back(inst); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + addToIQ(inst); + + if (inst->isMemRef()) { + addToLSQ(inst); + } + + if (inst->isNonSpeculative()) { + inst->setCanCommit(); + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. + if (exactFullStall) { + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + instsToDispatch->size-= i+1; + dispatchStall(); + return; + } + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. + + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +} + +template +void +BackEnd::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template +void +BackEnd::checkDispatchStatus() +{ + assert(dispatchStatus == Blocked); + if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template +void +BackEnd::scheduleReadyInsts() +{ + // Tell IQ to put any ready instructions into the instruction list. + // Probably want to have a list of DynInstPtrs returned here. Then I + // can choose to either put them into a time buffer to simulate + // IQ scheduling time, or hand them directly off to the next stage. + // Do you ever want to directly hand it off to the next stage? + DPRINTF(BE, "Trying to schedule ready instructions\n"); + IQ.scheduleReadyInsts(); +} + +template +void +BackEnd::executeInsts() +{ + int insts_to_execute = instsToExecute->size; + + issued_ops[0]+= insts_to_execute; + n_issued_dist[insts_to_execute]++; + + DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute); + + fetchRedirect[0] = false; + + while (insts_to_execute > 0) { + // Get ready instruction from the IQ (or queue coming out of IQ) + // Execute the ready instruction. + // Wakeup any dependents if it's done. + DynInstPtr inst = IQ.getReadyInst(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + ++funcExeInst; + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + --insts_to_execute; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + // Loads will mark themselves as executed, and their writeback + // event adds the instruction to the queue to commit + fault = LSQ.executeLoad(inst); + +// ++iewExecLoadInsts; + } else if (inst->isStore()) { + LSQ.executeStore(inst); + +// ++iewExecStoreInsts; + + if (!(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + // Store conditionals will mark themselves as executed, and + // their writeback event will add the instruction to the queue + // to commit. + } else { + panic("Unexpected memory type!\n"); + } + + } else { + inst->execute(); + +// ++iewExecutedInsts; + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + // Probably should have some sort of function for this. + // More general question of how to handle squashes? Have some sort of + // squash unit that controls it? Probably... + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + + // This probably needs to prioritize the redirects if a different + // scheduler is used. Currently the scheduler schedules the oldest + // instruction first, so the branch resolution order will be correct. + unsigned tid = inst->threadNumber; + + if (!fetchRedirect[tid]) { + + if (inst->mispredicted()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Execute: Branch mispredict detected.\n"); + DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { +// predictedTakenIncorrect++; + } else { +// predictedNotTakenIncorrect++; + } + } else if (LSQ.violation()) { + fetchRedirect[tid] = true; + + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. +// IQ.violation(inst, violator); + + // Squash. +// squashDueToMemOrder(inst,tid); + squashDueToBranch(inst); + +// ++memOrderViolationEvents; + } else if (LSQ.loadBlocked()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + + squashDueToMemBlocked(inst); + } + } + +// instList.pop_front(); + + --insts_to_execute; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; + } + + assert(insts_to_execute >= 0); +} + +template +void +BackEnd::instToCommit(DynInstPtr &inst) +{ + int wb_width = wbWidth; + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + while (numInstsToWB[wbCycle].size >= wb_width) { + ++wbCycle; + + assert(wbCycle < 5); + } + + // Add finished instruction to queue to commit. + writeback.push_back(inst); + numInstsToWB[wbCycle].size++; + + if (wbCycle) + wb_penalized[0]++; +} + +template +void +BackEnd::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setCompleted(); + + if (inst->isExecuted()) { + int dependents = IQ.wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} + +template +bool +BackEnd::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.front(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. +// thread->funcExeInst--; + + if (inst->isNonSpeculative()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } +#endif + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + // Send back the non-speculative instruction's sequence number. + toIEW->nonSpecSeqNum = inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + toIEW->nonSpecSeqNum = inst->seqNum; + toIEW->uncached = true; + toIEW->lqIdx = inst->lqIdx; + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (inst->isThreadSync()) + { + // Not handled for now. + panic("Barrier instructions are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + if (inst_fault != NoFault) { + if (!inst->isNop()) { +#if FULL_SYSTEM + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + +// assert(!thread->inSyscall); + +// thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. +// thread->inSyscall = false; + +// commitStatus = TrapPending; + + // Generate trap squash event. +// generateTrapEvent(); + + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + } + + if (inst->isControl()) { +// ++commitCommittedBranches; + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_front(); + + --numInsts; + cpu->numInst++; + thread->numInsts++; + ++thread->funcExeInst; + thread->PC = inst->readNextPC(); + updateComInstStats(inst); + + // Write the done sequence number here. + toIEW->doneSeqNum = inst->seqNum; + + return true; +} + +template +void +BackEnd::commitInsts() +{ + int commit_width = commitWidth ? commitWidth : width; + + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commit_width) { + if (instList.front()->isSquashed()) { + panic("No squashed insts should still be on the list!"); + instList.front()->clearDependents(); + instList.pop_front(); + continue; + } + + if (!commitInst(inst_num++)) { + break; + } + } + n_committed_dist.sample(inst_num); +} + +template +void +BackEnd::squash(const InstSeqNum &sn) +{ + IQ.squash(sn); + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt dispatch_end = dispatch.end(); + InstListIt insts_it = dispatch.end(); + insts_it--; + + while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) + { + DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + renameTable[(*insts_it)->destRegIdx(i)] = + (*insts_it)->getPrevDestInst(i); + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + --insts_it; + } + + insts_it = instList.end(); + insts_it--; + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + renameTable[(*insts_it)->destRegIdx(i)] = + (*insts_it)->getPrevDestInst(i); + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + instList.erase(insts_it--); + --numInsts; + } + + frontEnd->addFreeRegs(freed_regs); +} + +template +void +BackEnd::squashFromXC() +{ + xcSquash = true; +} + +template +void +BackEnd::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template +void +BackEnd::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template +void +BackEnd::fetchFault(Fault &fault) +{ +} + +template +void +BackEnd::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template +void +BackEnd::updateComInstStats(DynInstPtr &inst) +{ + unsigned thread = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[thread]++; + } else { + stat_com_inst[thread]++; + } +#else + stat_com_inst[thread]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[thread]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[thread]++; + + if (inst->isLoad()) { + stat_com_loads[thread]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[thread]++; + } +} + +template +void +BackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Dispatch list size: %i\n", dispatch.size()); + + inst_list_it = dispatch.begin(); + + while (inst_list_it != dispatch.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Writeback list size: %i\n", writeback.size()); + + inst_list_it = writeback.begin(); + + while (inst_list_it != writeback.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} diff --git a/cpu/ozone/cpu.cc b/cpu/ozone/cpu.cc index cbeca9d3b..d2ea0164c 100644 --- a/cpu/ozone/cpu.cc +++ b/cpu/ozone/cpu.cc @@ -26,8 +26,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "cpu/ooo_cpu/ooo_cpu_impl.hh" -#include "cpu/ooo_cpu/ooo_dyn_inst.hh" -#include "cpu/ooo_cpu/ooo_impl.hh" +#include "cpu/ozone/cpu_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" -template class OoOCPU; +template class OzoneCPU; +template class OzoneCPU; diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index f5d84d656..200ced265 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -26,15 +26,19 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __CPU_OOO_CPU_OOO_CPU_HH__ -#define __CPU_OOO_CPU_OOO_CPU_HH__ +#ifndef __CPU_OZONE_CPU_HH__ +#define __CPU_OZONE_CPU_HH__ + +#include #include "base/statistics.hh" +#include "base/timebuf.hh" #include "config/full_system.hh" #include "cpu/base.hh" #include "cpu/exec_context.hh" -#include "encumbered/cpu/full/fu_pool.hh" -#include "cpu/ooo_cpu/ea_list.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" #include "mem/mem_interface.hh" @@ -42,16 +46,19 @@ // forward declarations #if FULL_SYSTEM -class Processor; +#include "arch/alpha/tlb.hh" + class AlphaITB; class AlphaDTB; class PhysicalMemory; +class MemoryController; class RemoteGDB; class GDBListener; #else +class PageTable; class Process; #endif // FULL_SYSTEM @@ -72,23 +79,180 @@ namespace Trace { */ template -class OoOCPU : public BaseCPU +class OzoneCPU : public BaseCPU { private: + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; + typedef TheISA::MiscReg MiscReg; + + public: + class OzoneXC : public ExecContext { + public: + OzoneCPU *cpu; + + OzoneThreadState *thread; + + BaseCPU *getCpuPtr(); + + void setCpuId(int id); + + int readCpuId() { return thread->cpuId; } + + FunctionalMemory *getMemPtr() { return thread->mem; } + +#if FULL_SYSTEM + System *getSystemPtr() { return cpu->system; } + + PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + AlphaITB *getITBPtr() { return cpu->itb; } + + AlphaDTB * getDTBPtr() { return cpu->dtb; } +#else + Process *getProcessPtr() { return thread->process; } +#endif + + Status status() const { return thread->_status; } + + void setStatus(Status new_status); + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1); + + /// Set the status to Suspended. + void suspend(); + + /// Set the status to Unallocated. + void deallocate(); + + /// Set the status to Halted. + void halt(); + +#if FULL_SYSTEM + void dumpFuncProfile(); +#endif + + void takeOverFrom(ExecContext *old_context); + + void regStats(const std::string &name); + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + Event *getQuiesceEvent(); + + Tick readLastActivate(); + Tick readLastSuspend(); + + void profileClear(); + void profileSample(); +#endif + + int getThreadNum(); + + // Also somewhat obnoxious. Really only used for the TLB fault. + TheISA::MachInst getInst(); + + void copyArchRegs(ExecContext *xc); + + void clearArchRegs(); + + uint64_t readIntReg(int reg_idx); + + float readFloatRegSingle(int reg_idx); + + double readFloatRegDouble(int reg_idx); + + uint64_t readFloatRegInt(int reg_idx); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatRegSingle(int reg_idx, float val); + + void setFloatRegDouble(int reg_idx, double val); + + void setFloatRegInt(int reg_idx, uint64_t val); + + uint64_t readPC() { return thread->PC; } + void setPC(Addr val); + + uint64_t readNextPC() { return thread->nextPC; } + void setNextPC(Addr val); + + public: + // ISA stuff: + MiscReg readMiscReg(int misc_reg); + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + Fault setMiscReg(int misc_reg, const MiscReg &val); + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + unsigned readStCondFailures() + { return thread->storeCondFailures; } + + void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + +#if FULL_SYSTEM + bool inPalMode() { return cpu->inPalMode(); } +#endif + + bool misspeculating() { return false; } + +#if !FULL_SYSTEM + TheISA::IntReg getSyscallArg(int i) + { return thread->renameTable[TheISA::ArgumentReg0 + i]->readIntResult(); } + + // used to shift args for indirect syscall + void setSyscallArg(int i, TheISA::IntReg val) + { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); } + + void setSyscallReturn(SyscallReturn return_value) + { cpu->setSyscallReturn(return_value, thread->tid); } + + Counter readFuncExeInst() { return thread->funcExeInst; } + + void setFuncExeInst(Counter new_val) + { thread->funcExeInst = new_val; } +#endif + }; + + // execution context proxy + OzoneXC xcProxy; + + typedef OzoneThreadState ImplState; + + private: + OzoneThreadState thread; +/* + // Squash event for when the XC needs to squash all inflight instructions. + struct XCSquashEvent : public Event + { + void process(); + const char *description(); + }; +*/ public: // main simulation loop (one cycle) void tick(); + std::set snList; private: struct TickEvent : public Event { - OoOCPU *cpu; + OzoneCPU *cpu; int width; - TickEvent(OoOCPU *c, int w); + TickEvent(OzoneCPU *c, int w); void process(); const char *description(); }; @@ -122,16 +286,14 @@ class OoOCPU : public BaseCPU enum Status { Running, Idle, - IcacheMiss, - IcacheMissComplete, - DcacheMissStall, SwitchedOut }; - private: Status _status; public: + bool checkInterrupts; + void post_interrupt(int int_num, int index); void zero_fill_64(Addr addr) { @@ -142,33 +304,24 @@ class OoOCPU : public BaseCPU } }; - struct Params : public BaseCPU::Params - { - MemInterface *icache_interface; - MemInterface *dcache_interface; - int width; -#if FULL_SYSTEM - AlphaITB *itb; - AlphaDTB *dtb; - FunctionalMemory *mem; -#else - Process *process; -#endif - int issueWidth; - }; + typedef typename Impl::Params Params; - OoOCPU(Params *params); + OzoneCPU(Params *params); - virtual ~OoOCPU(); + virtual ~OzoneCPU(); void init(); - private: - void copyFromXC(); - public: - // execution context - ExecContext *xc; + BaseCPU *getCpuPtr() { return this; } + + void setCpuId(int id) { cpuId = id; } + + int readCpuId() { return cpuId; } + +// FunctionalMemory *getMemPtr() { return mem; } + + int cpuId; void switchOut(); void takeOverFrom(BaseCPU *oldCPU); @@ -177,6 +330,16 @@ class OoOCPU : public BaseCPU Addr dbg_vtophys(Addr addr); bool interval_stats; + + AlphaITB *itb; + AlphaDTB *dtb; + System *system; + + // the following two fields are redundant, since we can always + // look them up through the system pointer, but we'll leave them + // here for now for convenience + MemoryController *memctrl; + PhysicalMemory *physmem; #endif // L1 instruction cache @@ -185,54 +348,18 @@ class OoOCPU : public BaseCPU // L1 data cache MemInterface *dcacheInterface; - FuncUnitPool *fuPool; - - // Refcounted pointer to the one memory request. - MemReqPtr cacheMemReq; - - class ICacheCompletionEvent : public Event - { - private: - OoOCPU *cpu; - - public: - ICacheCompletionEvent(OoOCPU *_cpu); - - virtual void process(); - virtual const char *description(); - }; - - // Will need to create a cache completion event upon any memory miss. - ICacheCompletionEvent iCacheCompletionEvent; - - class DCacheCompletionEvent; - - typedef typename - std::list::iterator DCacheCompEventIt; - - class DCacheCompletionEvent : public Event - { - private: - OoOCPU *cpu; - DynInstPtr inst; - DCacheCompEventIt dcceIt; - - public: - DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, - DCacheCompEventIt &_dcceIt); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; +#if !FULL_SYSTEM + PageTable *pTable; +#endif - protected: - std::list dCacheCompList; - DCacheCompEventIt dcceIt; + FrontEnd *frontEnd; + BackEnd *backEnd; private: Status status() const { return _status; } + void setStatus(Status new_status) { _status = new_status; } + + // Not sure what an activate() call on the CPU's proxy XC would mean... virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); @@ -244,17 +371,19 @@ class OoOCPU : public BaseCPU virtual void resetStats(); // number of simulated instructions + public: Counter numInst; Counter startNumInst; - Stats::Scalar<> numInsts; +// Stats::Scalar<> numInsts; virtual Counter totalInstructions() const { return numInst - startNumInst; } + private: // number of simulated memory references - Stats::Scalar<> numMemRefs; +// Stats::Scalar<> numMemRefs; // number of simulated loads Counter numLoad; @@ -263,27 +392,15 @@ class OoOCPU : public BaseCPU // number of idle cycles Stats::Average<> notIdleFraction; Stats::Formula idleFraction; - - // number of cycles stalled for I-cache misses - Stats::Scalar<> icacheStallCycles; - Counter lastIcacheStall; - - // number of cycles stalled for D-cache misses - Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; - - void processICacheCompletion(); - public: virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + #if FULL_SYSTEM bool validInstAddr(Addr addr) { return true; } bool validDataAddr(Addr addr) { return true; } - int getInstAsid() { return xc->regs.instAsid(); } - int getDataAsid() { return xc->regs.dataAsid(); } Fault translateInstReq(MemReqPtr &req) { @@ -302,13 +419,13 @@ class OoOCPU : public BaseCPU #else bool validInstAddr(Addr addr) - { return xc->validInstAddr(addr); } + { return true; } bool validDataAddr(Addr addr) - { return xc->validDataAddr(addr); } + { return true; } - int getInstAsid() { return xc->asid; } - int getDataAsid() { return xc->asid; } + int getInstAsid() { return thread.asid; } + int getDataAsid() { return thread.asid; } Fault dummyTranslation(MemReqPtr &req) { @@ -321,27 +438,38 @@ class OoOCPU : public BaseCPU req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; return NoFault; } + + /** Translates instruction requestion in syscall emulation mode. */ Fault translateInstReq(MemReqPtr &req) { return dummyTranslation(req); } + + /** Translates data read request in syscall emulation mode. */ Fault translateDataReadReq(MemReqPtr &req) { return dummyTranslation(req); } + + /** Translates data write request in syscall emulation mode. */ Fault translateDataWriteReq(MemReqPtr &req) { return dummyTranslation(req); } - #endif - + /** CPU read function, forwards read to LSQ. */ template - Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); + Fault read(MemReqPtr &req, T &data, int load_idx) + { + return backEnd->read(req, data, load_idx); + } + /** CPU write function, forwards write to LSQ. */ template - Fault write(T data, Addr addr, unsigned flags, - uint64_t *res, DynInstPtr inst); + Fault write(MemReqPtr &req, T &data, int store_idx) + { + return backEnd->write(req, data, store_idx); + } void prefetch(Addr addr, unsigned flags) { @@ -357,270 +485,38 @@ class OoOCPU : public BaseCPU Fault copy(Addr dest); - private: - bool executeInst(DynInstPtr &inst); - - void renameInst(DynInstPtr &inst); - - void addInst(DynInstPtr &inst); - - void commitHeadInst(); - - bool getOneInst(); - - Fault fetchCacheLine(); - - InstSeqNum getAndIncrementInstSeq(); - - bool ambigMemAddr; - - private: InstSeqNum globalSeqNum; - DynInstPtr renameTable[TheISA::TotalNumRegs]; - DynInstPtr commitTable[TheISA::TotalNumRegs]; - - // Might need a table of the shadow registers as well. -#if FULL_SYSTEM - DynInstPtr palShadowTable[TheISA::NumIntRegs]; -#endif - - public: - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - // In the OoO case these shouldn't read from the XC but rather from the - // rename table of DynInsts. Also these likely shouldn't be called very - // often, other than when adding things into the xc during say a syscall. - - uint64_t readIntReg(StaticInst *si, int idx) - { - return xc->readIntReg(si->srcRegIdx(idx)); - } - - float readFloatRegSingle(StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatRegSingle(reg_idx); - } - - double readFloatRegDouble(StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatRegDouble(reg_idx); - } - - uint64_t readFloatRegInt(StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatRegInt(reg_idx); - } - - void setIntReg(StaticInst *si, int idx, uint64_t val) - { - xc->setIntReg(si->destRegIdx(idx), val); - } - - void setFloatRegSingle(StaticInst *si, int idx, float val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatRegSingle(reg_idx, val); - } - - void setFloatRegDouble(StaticInst *si, int idx, double val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatRegDouble(reg_idx, val); - } - - void setFloatRegInt(StaticInst *si, int idx, uint64_t val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatRegInt(reg_idx, val); - } - - uint64_t readPC() { return PC; } - void setNextPC(Addr val) { nextPC = val; } - - private: - Addr PC; - Addr nextPC; - - unsigned issueWidth; - - bool fetchRedirExcp; - bool fetchRedirBranch; - - /** Mask to get a cache block's address. */ - Addr cacheBlkMask; - - unsigned cacheBlkSize; - - Addr cacheBlkPC; - - /** The cache line being fetched. */ - uint8_t *cacheData; - - protected: - bool cacheBlkValid; - - private: - - // Align an address (typically a PC) to the start of an I-cache block. - // We fold in the PISA 64- to 32-bit conversion here as well. - Addr icacheBlockAlignPC(Addr addr) - { - addr = TheISA::realPCToFetchPC(addr); - return (addr & ~(cacheBlkMask)); - } - - unsigned instSize; - - // ROB tracking stuff. - DynInstPtr robHeadPtr; - DynInstPtr robTailPtr; - unsigned robSize; - unsigned robInsts; - - // List of outstanding EA instructions. - protected: - EAList eaList; - public: - void branchToTarget(Addr val) - { - if (!fetchRedirExcp) { - fetchRedirBranch = true; - PC = val; - } - } + void squashFromXC(); - // ISA stuff: - uint64_t readUniq() { return xc->readUniq(); } - void setUniq(uint64_t val) { xc->setUniq(val); } - - uint64_t readFpcr() { return xc->readFpcr(); } - void setFpcr(uint64_t val) { xc->setFpcr(val); } + // @todo: This can be a useful debug function. Implement it. + void dumpInsts() { frontEnd->dumpInsts(); } #if FULL_SYSTEM - uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } - Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } - Fault hwrei() { return xc->hwrei(); } - int readIntrFlag() { return xc->readIntrFlag(); } - void setIntrFlag(int val) { xc->setIntrFlag(val); } - bool inPalMode() { return xc->inPalMode(); } - void trap(Fault fault) { fault->invoke(xc); } - bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } + Fault hwrei(); + int readIntrFlag() { return thread.regs.intrflag; } + void setIntrFlag(int val) { thread.regs.intrflag = val; } + bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } + bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } + bool simPalCheck(int palFunc); #else - void syscall() { xc->syscall(); } -#endif - - ExecContext *xcBase() { return xc; } -}; - - -// precise architected memory state accessor macros -template -template -Fault -OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) -{ - MemReqPtr readReq = new MemReq(); - readReq->xc = xc; - readReq->asid = 0; - readReq->data = new uint8_t[64]; - - readReq->reset(addr, sizeof(T), flags); - - // translate to physical address - This might be an ISA impl call - Fault fault = translateDataReadReq(readReq); - - // do functional access - if (fault == NoFault) - fault = xc->mem->read(readReq, data); -#if 0 - if (traceData) { - traceData->setAddr(addr); - if (fault == NoFault) - traceData->setData(data); - } -#endif - - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - readReq->cmd = Read; - readReq->completionEvent = NULL; - readReq->time = curTick; - /*MemAccessResult result = */dcacheInterface->access(readReq); - - if (dcacheInterface->doEvents()) { - readReq->completionEvent = new DCacheCompletionEvent(this, inst, - dcceIt); - } - } - - if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); - - return fault; -} - -template -template -Fault -OoOCPU::write(T data, Addr addr, unsigned flags, - uint64_t *res, DynInstPtr inst) -{ - MemReqPtr writeReq = new MemReq(); - writeReq->xc = xc; - writeReq->asid = 0; - writeReq->data = new uint8_t[64]; - -#if 0 - if (traceData) { - traceData->setAddr(addr); - traceData->setData(data); - } + void syscall(); + void setSyscallReturn(SyscallReturn return_value, int tid); #endif - writeReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = translateDataWriteReq(writeReq); - - // do functional access - if (fault == NoFault) - fault = xc->write(writeReq, data); - - if (fault == NoFault && dcacheInterface) { - writeReq->cmd = Write; - memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); - writeReq->completionEvent = NULL; - writeReq->time = curTick; - /*MemAccessResult result = */dcacheInterface->access(writeReq); - - if (dcacheInterface->doEvents()) { - writeReq->completionEvent = new DCacheCompletionEvent(this, inst, - dcceIt); - } - } + ExecContext *xcBase() { return &xcProxy; } - if (res && (fault == NoFault)) - *res = writeReq->result; - - if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); - - return fault; -} + bool decoupledFrontEnd; + struct CommStruct { + InstSeqNum doneSeqNum; + InstSeqNum nonSpecSeqNum; + bool uncached; + unsigned lqIdx; + bool stall; + }; + TimeBuffer comm; +}; -#endif // __CPU_OOO_CPU_OOO_CPU_HH__ +#endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc new file mode 100644 index 000000000..0146dd1bd --- /dev/null +++ b/cpu/ozone/cpu_builder.cc @@ -0,0 +1,818 @@ + +#include + +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class DerivOzoneCPU : public OzoneCPU +{ + public: + DerivOzoneCPU(SimpleParams *p) + : OzoneCPU(p) + { } +}; + +class SimpleOzoneCPU : public OzoneCPU +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU(p) + { } +}; + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + Param clock; + Param numThreads; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; +//SimObjectParam page_table; +#endif // FULL_SYSTEM + +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param cachePorts; +Param width; +Param frontEndWidth; +Param backEndWidth; +Param backEndSquashLatency; +Param backEndLatency; +Param maxInstBufferSize; +Param numPhysicalRegs; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param decoupledFrontEnd; +Param dispatchWidth; +Param wbWidth; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +CREATE_SIM_OBJECT(DerivOzoneCPU) +{ + DerivOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) + + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param clock; + Param numThreads; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; +//SimObjectParam page_table; +#endif // FULL_SYSTEM + +SimObjectParam mem; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param cachePorts; +Param width; +Param frontEndWidth; +Param backEndWidth; +Param backEndSquashLatency; +Param backEndLatency; +Param maxInstBufferSize; +Param numPhysicalRegs; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param decoupledFrontEnd; +Param dispatchWidth; +Param wbWidth; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index e7ed3cfe0..36ec30b2c 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -26,23 +26,1137 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ -#define __CPU_OOO_CPU_OOO_IMPL_HH__ +#include +#include -#include "arch/isa_traits.hh" +#include "arch/isa_traits.hh" // For MachInst +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/quiesce_event.hh" +#include "cpu/static_inst.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#if FULL_SYSTEM +#include "arch/faults.hh" +#include "arch/alpha/osfpal.hh" +#include "arch/alpha/tlb.hh" +#include "arch/vtophys.hh" +#include "base/callback.hh" +#include "base/remote_gdb.hh" +#include "cpu/profile.hh" +#include "kern/kernel_stats.hh" +#include "mem/functional/memory_control.hh" +#include "mem/functional/physical.hh" +#include "sim/faults.hh" +#include "sim/sim_events.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" +#else // !FULL_SYSTEM +#include "mem/functional/functional.hh" +#include "sim/process.hh" +#endif // FULL_SYSTEM + +using namespace TheISA; + +template +template +void +OzoneCPU::trace_data(T data) { + if (traceData) { + traceData->setData(data); + } +} + +template +OzoneCPU::TickEvent::TickEvent(OzoneCPU *c, int w) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w) +{ +} + +template +void +OzoneCPU::TickEvent::process() +{ + cpu->tick(); +} + +template +const char * +OzoneCPU::TickEvent::description() +{ + return "OzoneCPU tick event"; +} +/* +template +OzoneCPU::ICacheCompletionEvent::ICacheCompletionEvent(OzoneCPU *_cpu) + : Event(&mainEventQueue), + cpu(_cpu) +{ +} + +template +void +OzoneCPU::ICacheCompletionEvent::process() +{ + cpu->processICacheCompletion(); +} + +template +const char * +OzoneCPU::ICacheCompletionEvent::description() +{ + return "OzoneCPU I-cache completion event"; +} + +template +OzoneCPU::DCacheCompletionEvent:: +DCacheCompletionEvent(OzoneCPU *_cpu, + DynInstPtr &_inst, + DCacheCompEventIt &_dcceIt) + : Event(&mainEventQueue), + cpu(_cpu), + inst(_inst), + dcceIt(_dcceIt) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +OzoneCPU::DCacheCompletionEvent::process() +{ + inst->setCompleted(); + + // Maybe remove the EA from the list of addrs? + cpu->eaList.clearAddr(inst->seqNum, inst->getEA()); + cpu->dCacheCompList.erase(this->dcceIt); +} + +template +const char * +OzoneCPU::DCacheCompletionEvent::description() +{ + return "OzoneCPU D-cache completion event"; +} +*/ +template +OzoneCPU::OzoneCPU(Params *p) +#if FULL_SYSTEM + : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), +#else + : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), +#endif + comm(5, 5) +{ + frontEnd = new FrontEnd(p); + backEnd = new BackEnd(p); + + _status = Idle; + thread.xcProxy = &xcProxy; + + thread.inSyscall = false; + + xcProxy.cpu = this; + xcProxy.thread = &thread; + + thread.setStatus(ExecContext::Suspended); +#if FULL_SYSTEM +// xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem); + + /***** All thread state stuff *****/ + thread.cpu = this; + thread.tid = 0; + thread.mem = p->mem; + + thread.quiesceEvent = new EndQuiesceEvent(&xcProxy); + + system = p->system; + itb = p->itb; + dtb = p->dtb; + memctrl = p->system->memctrl; + physmem = p->system->physmem; + + if (p->profile) { + thread.profile = new FunctionProfile(p->system->kernelSymtab); + Callback *cb = + new MakeCallback(&xcProxy); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + thread.profileNode = &dummyNode; + thread.profilePC = 3; + +#else +// xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0); + thread.cpu = this; + thread.tid = 0; + thread.process = p->workload[0]; +// thread.mem = thread.process->getMemory(); + thread.asid = 0; +#endif // !FULL_SYSTEM +/* + icacheInterface = p->icache_interface; + dcacheInterface = p->dcache_interface; + + cacheMemReq = new MemReq(); + cacheMemReq->xc = xc; + cacheMemReq->asid = 0; + cacheMemReq->data = new uint8_t[64]; +*/ + numInst = 0; + startNumInst = 0; +/* numLoad = 0; + startNumLoad = 0; + lastIcacheStall = 0; + lastDcacheStall = 0; + + issueWidth = p->issueWidth; +*/ + execContexts.push_back(&xcProxy); + + frontEnd->setCPU(this); + backEnd->setCPU(this); + + frontEnd->setXC(&xcProxy); + backEnd->setXC(&xcProxy); + + frontEnd->setThreadState(&thread); + backEnd->setThreadState(&thread); + + frontEnd->setCommBuffer(&comm); + backEnd->setCommBuffer(&comm); + + frontEnd->setBackEnd(backEnd); + backEnd->setFrontEnd(frontEnd); + + decoupledFrontEnd = p->decoupledFrontEnd; + + globalSeqNum = 1; + + checkInterrupts = false; +/* + fetchRedirBranch = true; + fetchRedirExcp = true; + + // Need to initialize the rename maps, and the head and tail pointers. + robHeadPtr = new DynInst(this); + robTailPtr = new DynInst(this); + + robHeadPtr->setNextInst(robTailPtr); +// robHeadPtr->setPrevInst(NULL); +// robTailPtr->setNextInst(NULL); + robTailPtr->setPrevInst(robHeadPtr); + + robHeadPtr->setCompleted(); + robTailPtr->setCompleted(); + + for (int i = 0; i < ISA::TotalNumRegs; ++i) { + renameTable[i] = new DynInst(this); + commitTable[i] = new DynInst(this); + + renameTable[i]->setCompleted(); + commitTable[i]->setCompleted(); + } + +#if FULL_SYSTEM + for (int i = 0; i < ISA::NumIntRegs; ++i) { + palShadowTable[i] = new DynInst(this); + palShadowTable[i]->setCompleted(); + } +#endif + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Get the size of an instruction. + instSize = sizeof(MachInst); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; + + cacheBlkValid = false; +*/ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + thread.renameTable[i] = new DynInst(this); + thread.renameTable[i]->setCompleted(); + } + + frontEnd->renameTable.copyFrom(thread.renameTable); + backEnd->renameTable.copyFrom(thread.renameTable); + +#if !FULL_SYSTEM + pTable = p->pTable; +#endif + + DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n"); +} + +template +OzoneCPU::~OzoneCPU() +{ +} +/* +template +void +OzoneCPU::copyFromXC() +{ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + if (i < TheISA::NumIntRegs) { + renameTable[i]->setIntResult(xc->readIntReg(i)); + } else if (i < TheISA::NumFloatRegs) { + renameTable[i]->setDoubleResult(xc->readFloatRegDouble(i)); + } + } + + DPRINTF(OzoneCPU, "Func Exe inst is: %i\n", xc->func_exe_inst); + backEnd->funcExeInst = xc->func_exe_inst; +// PC = xc->readPC(); +// nextPC = xc->regs.npc; +} + +template +void +OzoneCPU::copyToXC() +{ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + if (i < TheISA::NumIntRegs) { + xc->setIntReg(i, renameTable[i]->readIntResult()); + } else if (i < TheISA::NumFloatRegs) { + xc->setFloatRegDouble(i, renameTable[i]->readDoubleResult()); + } + } + + this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs[tid].fpcr; + this->xc->regs.miscRegs.uniq = this->regFile.miscRegs[tid].uniq; + this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs[tid].lock_flag; + this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs[tid].lock_addr; + + xc->func_exe_inst = backEnd->funcExeInst; + xc->regs.pc = PC; + xc->regs.npc = nextPC; +} +*/ +template +void +OzoneCPU::switchOut() +{ + _status = SwitchedOut; + if (tickEvent.scheduled()) + tickEvent.squash(); +} + +template +void +OzoneCPU::takeOverFrom(BaseCPU *oldCPU) +{ + BaseCPU::takeOverFrom(oldCPU); + + assert(!tickEvent.scheduled()); + + // if any of this CPU's ExecContexts are active, mark the CPU as + // running and schedule its tick event. + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && + _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + } + } +} + +template +void +OzoneCPU::activateContext(int thread_num, int delay) +{ + // Eventually change this in SMT. + assert(thread_num == 0); +// assert(xcProxy); + + assert(_status == Idle); + notIdleFraction++; + scheduleTickEvent(delay); + _status = Running; + thread._status = ExecContext::Active; +} + +template +void +OzoneCPU::suspendContext(int thread_num) +{ + // Eventually change this in SMT. + assert(thread_num == 0); +// assert(xcProxy); + + assert(_status == Running); + notIdleFraction--; + unscheduleTickEvent(); + _status = Idle; +} + +template +void +OzoneCPU::deallocateContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + +template +void +OzoneCPU::haltContext(int thread_num) +{ + // for now, these are equivalent + suspendContext(thread_num); +} + +template +void +OzoneCPU::regStats() +{ + using namespace Stats; + + BaseCPU::regStats(); + + thread.numInsts + .name(name() + ".num_insts") + .desc("Number of instructions executed") + ; + + thread.numMemRefs + .name(name() + ".num_refs") + .desc("Number of memory references") + ; + + notIdleFraction + .name(name() + ".not_idle_fraction") + .desc("Percentage of non-idle cycles") + ; + + idleFraction + .name(name() + ".idle_fraction") + .desc("Percentage of idle cycles") + ; + + idleFraction = constant(1.0) - notIdleFraction; + + frontEnd->regStats(); + backEnd->regStats(); +} + +template +void +OzoneCPU::resetStats() +{ + startNumInst = numInst; + notIdleFraction = (_status != Idle); +} + +template +void +OzoneCPU::init() +{ + BaseCPU::init(); +/* + copyFromXC(); + + // ALso copy over PC/nextPC. This isn't normally copied in "copyFromXC()" + // so that the XC doesn't mess up the PC when returning from a syscall. + PC = xc->readPC(); + nextPC = xc->regs.npc; +*/ + // Mark this as in syscall so it won't need to squash + thread.inSyscall = true; +#if FULL_SYSTEM + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + + // initialize CPU, including PC + TheISA::initCPU(xc, xc->readCpuId()); + } +#endif + frontEnd->renameTable.copyFrom(thread.renameTable); + backEnd->renameTable.copyFrom(thread.renameTable); + + thread.inSyscall = false; +} + +template +void +OzoneCPU::serialize(std::ostream &os) +{ + // At this point, all DCacheCompEvents should be processed. + + BaseCPU::serialize(os); + SERIALIZE_ENUM(_status); + nameOut(os, csprintf("%s.xc", name())); + xcProxy.serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); +} + +template +void +OzoneCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + BaseCPU::unserialize(cp, section); + UNSERIALIZE_ENUM(_status); + xcProxy.unserialize(cp, csprintf("%s.xc", section)); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); +} + +template +Fault +OzoneCPU::copySrcTranslate(Addr src) +{ + panic("Copy not implemented!\n"); + return NoFault; +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + int offset = src & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) && + (src >> 40) != 0xfffffc) { + warn("Copied block source spans pages %x.", src); + no_warn = false; + } + + memReq->reset(src & ~(blk_size - 1), blk_size); + + // translate to physical address + Fault fault = xc->translateDataReadReq(memReq); + + assert(fault != Alignment_Fault); + + if (fault == NoFault) { + xc->copySrcAddr = src; + xc->copySrcPhysAddr = memReq->paddr + offset; + } else { + xc->copySrcAddr = 0; + xc->copySrcPhysAddr = 0; + } + return fault; +#endif +} + +template +Fault +OzoneCPU::copy(Addr dest) +{ + panic("Copy not implemented!\n"); + return NoFault; +#if 0 + static bool no_warn = true; + int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; + // Only support block sizes of 64 atm. + assert(blk_size == 64); + uint8_t data[blk_size]; + //assert(xc->copySrcAddr); + int offset = dest & (blk_size - 1); + + // Make sure block doesn't span page + if (no_warn && + (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) && + (dest >> 40) != 0xfffffc) { + no_warn = false; + warn("Copied block destination spans pages %x. ", dest); + } + + memReq->reset(dest & ~(blk_size -1), blk_size); + // translate to physical address + Fault fault = xc->translateDataWriteReq(memReq); + + assert(fault != Alignment_Fault); + + if (fault == NoFault) { + Addr dest_addr = memReq->paddr + offset; + // Need to read straight from memory since we have more than 8 bytes. + memReq->paddr = xc->copySrcPhysAddr; + xc->mem->read(memReq, data); + memReq->paddr = dest_addr; + xc->mem->write(memReq, data); + if (dcacheInterface) { + memReq->cmd = Copy; + memReq->completionEvent = NULL; + memReq->paddr = xc->copySrcPhysAddr; + memReq->dest = dest_addr; + memReq->size = 64; + memReq->time = curTick; + dcacheInterface->access(memReq); + } + } + return fault; +#endif +} + +#if FULL_SYSTEM +template +Addr +OzoneCPU::dbg_vtophys(Addr addr) +{ + return vtophys(&xcProxy, addr); +} +#endif // FULL_SYSTEM +/* +template +void +OzoneCPU::processICacheCompletion() +{ + switch (status()) { + case IcacheMiss: + DPRINTF(OzoneCPU, "OzoneCPU: Finished Icache miss.\n"); + + icacheStallCycles += curTick - lastIcacheStall; + _status = IcacheMissComplete; + cacheBlkValid = true; +// scheduleTickEvent(1); + break; + case SwitchedOut: + // If this CPU has been switched out due to sampling/warm-up, + // ignore any further status changes (e.g., due to cache + // misses outstanding at the time of the switch). + return; + default: + panic("OzoneCPU::processICacheCompletion: bad state"); + break; + } +} +*/ +#if FULL_SYSTEM +template +void +OzoneCPU::post_interrupt(int int_num, int index) +{ + BaseCPU::post_interrupt(int_num, index); + + if (thread._status == ExecContext::Suspended) { + DPRINTF(IPI,"Suspended Processor awoke\n"); +// thread.activate(); + // Hack for now. Otherwise might have to go through the xcProxy, or + // I need to figure out what's the right thing to call. + activateContext(thread.tid, 1); + } +} +#endif // FULL_SYSTEM + +/* start simulation, program loaded, processor precise state initialized */ +template +void +OzoneCPU::tick() +{ + DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n"); + + thread.renameTable[ZeroReg]->setIntResult(0); + thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]-> + setDoubleResult(0.0); + + // General code flow: + // Check for any interrupts. Handle them if I do have one. + // Check if I have a need to fetch a new cache block. Either a bit could be + // set by functions indicating that I need to fetch a new block, or I could + // hang onto the last PC of the last cache block I fetched and compare the + // current PC to that. Setting a bit seems nicer but may be more error + // prone. + // Scan through the IQ to figure out if there's anything I can issue/execute + // Might need something close to the FU Pools to tell what instructions + // I can issue. How to handle loads and stores vs other insts? + // Extremely slow way: find first inst that can possibly issue; if it's a + // load or a store, then iterate through load/store queue. + // If I can't find instructions to execute and I've got room in the IQ + // (which is just a counter), then grab a few instructions out of the cache + // line buffer until I either run out or can execute up until my limit. + + numCycles++; + + traceData = NULL; + +// Fault fault = NoFault; + +#if 0 // FULL_SYSTEM + if (checkInterrupts && check_interrupts() && !inPalMode() && + status() != IcacheMissComplete) { + int ipl = 0; + int summary = 0; + checkInterrupts = false; + + if (readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + // Is this method so that if the interrupts are switched over from + // another CPU they'll still be handled? +// uint64_t interrupts = cpuXC->cpu->intr_status(); + uint64_t interrupts = intr_status(); + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + + if (readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (ipl && ipl > readMiscReg(IPR_IPLR)) { + setMiscReg(IPR_ISR, summary); + setMiscReg(IPR_INTID, ipl); + + Fault(new InterruptFault)->invoke(xc); + + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + readMiscReg(IPR_IPLR), ipl, summary); + } + } +#endif + + // Make call to ISA to ensure 0 register semantics...actually because the + // DynInsts will generally be the register file, this should only have to + // happen when the xc is actually written to (during a syscall or something) + // maintain $r0 semantics +// assert(renameTable[ZeroReg]->readIntResult() == 0); +#ifdef TARGET_ALPHA +// assert(renameTable[ZeroReg]->readDoubleResult() == 0); +#endif // TARGET_ALPHA + + comm.advance(); + frontEnd->tick(); + backEnd->tick(); + + // Do this here? For now the front end will control the PC. +// PC = nextPC; + + // check for instruction-count-based events + comInstEventQueue[0]->serviceEvents(numInst); + + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick + 1); +} + +template +void +OzoneCPU::squashFromXC() +{ + thread.inSyscall = true; + backEnd->squashFromXC(); +} + +#if !FULL_SYSTEM +template +void +OzoneCPU::syscall() +{ + // Not sure this copy is needed, depending on how the XC proxy is made. + thread.renameTable.copyFrom(backEnd->renameTable); + + thread.inSyscall = true; + + thread.funcExeInst++; + + DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); + + thread.process->syscall(&xcProxy); + + thread.funcExeInst--; + + thread.inSyscall = false; + + frontEnd->renameTable.copyFrom(thread.renameTable); + backEnd->renameTable.copyFrom(thread.renameTable); +} + +template +void +OzoneCPU::setSyscallReturn(SyscallReturn return_value, int tid) +{ + // check for error condition. Alpha syscall convention is to + // indicate success/failure in reg a3 (r19) and put the + // return value itself in the standard return value reg (v0). + if (return_value.successful()) { + // no error + thread.renameTable[SyscallSuccessReg]->setIntResult(0); + thread.renameTable[ReturnValueReg]->setIntResult(return_value.value()); + } else { + // got an error, return details + thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1); + thread.renameTable[ReturnValueReg]->setIntResult(-return_value.value()); + } +} +#else +template +Fault +OzoneCPU::hwrei() +{ + // Need to move this to ISA code + // May also need to make this per thread + if (!inPalMode()) + return new UnimplementedOpcodeFault; + + thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); + + // Not sure how to make a similar check in the Ozone model +// if (!misspeculating()) { + kernelStats->hwrei(); + + checkInterrupts = true; +// } + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template +bool +OzoneCPU::simPalCheck(int palFunc) +{ + // Need to move this to ISA code + // May also need to make this per thread + this->kernelStats->callpal(palFunc, &xcProxy); + + switch (palFunc) { + case PAL::halt: + haltContext(thread.tid); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} +#endif + +template +BaseCPU * +OzoneCPU::OzoneXC::getCpuPtr() +{ + return cpu; +} + +template +void +OzoneCPU::OzoneXC::setCpuId(int id) +{ + cpu->cpuId = id; + thread->cpuId = id; +} + +template +void +OzoneCPU::OzoneXC::setStatus(Status new_status) +{ +// cpu->_status = new_status; + thread->_status = new_status; +} + +template +void +OzoneCPU::OzoneXC::activate(int delay) +{ + cpu->activateContext(thread->tid, delay); +} + +/// Set the status to Suspended. +template +void +OzoneCPU::OzoneXC::suspend() +{ + cpu->suspendContext(thread->tid); +} + +/// Set the status to Unallocated. +template +void +OzoneCPU::OzoneXC::deallocate() +{ + cpu->deallocateContext(thread->tid); +} + +/// Set the status to Halted. +template +void +OzoneCPU::OzoneXC::halt() +{ + cpu->haltContext(thread->tid); +} + +#if FULL_SYSTEM +template +void +OzoneCPU::OzoneXC::dumpFuncProfile() +{ } +#endif + +template +void +OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) +{ } + +template +void +OzoneCPU::OzoneXC::regStats(const std::string &name) +{ } + +template +void +OzoneCPU::OzoneXC::serialize(std::ostream &os) +{ } + +template +void +OzoneCPU::OzoneXC::unserialize(Checkpoint *cp, const std::string §ion) +{ } + +#if FULL_SYSTEM +template +Event * +OzoneCPU::OzoneXC::getQuiesceEvent() +{ + return thread->quiesceEvent; +} + +template +Tick +OzoneCPU::OzoneXC::readLastActivate() +{ + return thread->lastActivate; +} + +template +Tick +OzoneCPU::OzoneXC::readLastSuspend() +{ + return thread->lastSuspend; +} + +template +void +OzoneCPU::OzoneXC::profileClear() +{ + if (thread->profile) + thread->profile->clear(); +} + +template +void +OzoneCPU::OzoneXC::profileSample() +{ + if (thread->profile) + thread->profile->sample(thread->profileNode, thread->profilePC); +} +#endif + +template +int +OzoneCPU::OzoneXC::getThreadNum() +{ + return thread->tid; +} + +// Also somewhat obnoxious. Really only used for the TLB fault. +template +TheISA::MachInst +OzoneCPU::OzoneXC::getInst() +{ + return thread->inst; +} + +template +void +OzoneCPU::OzoneXC::copyArchRegs(ExecContext *xc) +{ + thread->PC = xc->readPC(); + thread->nextPC = xc->readNextPC(); + + cpu->frontEnd->setPC(thread->PC); + cpu->frontEnd->setNextPC(thread->nextPC); + + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + if (i < TheISA::FP_Base_DepTag) { + thread->renameTable[i]->setIntResult(xc->readIntReg(i)); + } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { + int fp_idx = i - TheISA::FP_Base_DepTag; + thread->renameTable[i]->setDoubleResult( + xc->readFloatRegDouble(fp_idx)); + } + } + +#if !FULL_SYSTEM + thread->funcExeInst = xc->readFuncExeInst(); +#endif + + // Need to copy the XC values into the current rename table, + // copy the misc regs. + thread->regs.miscRegs.copyMiscRegs(xc); +} + +template +void +OzoneCPU::OzoneXC::clearArchRegs() +{ + panic("Unimplemented!"); +} template -class OoOCPU; +uint64_t +OzoneCPU::OzoneXC::readIntReg(int reg_idx) +{ + return thread->renameTable[reg_idx]->readIntResult(); +} + +template +float +OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) +{ + return thread->renameTable[reg_idx]->readFloatResult(); +} + +template +double +OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) +{ + return thread->renameTable[reg_idx]->readDoubleResult(); +} + +template +uint64_t +OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) +{ + return thread->renameTable[reg_idx]->readIntResult(); +} + +template +void +OzoneCPU::OzoneXC::setIntReg(int reg_idx, uint64_t val) +{ + thread->renameTable[reg_idx]->setIntResult(val); + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } +} + +template +void +OzoneCPU::OzoneXC::setFloatRegSingle(int reg_idx, float val) +{ + panic("Unimplemented!"); +} + +template +void +OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) +{ + thread->renameTable[reg_idx]->setDoubleResult(val); + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } +} + +template +void +OzoneCPU::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val) +{ + panic("Unimplemented!"); +} + +template +void +OzoneCPU::OzoneXC::setPC(Addr val) +{ + thread->PC = val; + cpu->frontEnd->setPC(val); + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } +} + +template +void +OzoneCPU::OzoneXC::setNextPC(Addr val) +{ + thread->nextPC = val; + cpu->frontEnd->setNextPC(val); + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } +} + +template +TheISA::MiscReg +OzoneCPU::OzoneXC::readMiscReg(int misc_reg) +{ + return thread->regs.miscRegs.readReg(misc_reg); +} + +template +TheISA::MiscReg +OzoneCPU::OzoneXC::readMiscRegWithEffect(int misc_reg, Fault &fault) +{ + return thread->regs.miscRegs.readRegWithEffect(misc_reg, + fault, this); +} + +template +Fault +OzoneCPU::OzoneXC::setMiscReg(int misc_reg, const MiscReg &val) +{ + // Needs to setup a squash event unless we're in syscall mode + Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val); + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } + + return ret_fault; +} template -class OoODynInst; +Fault +OzoneCPU::OzoneXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +{ + // Needs to setup a squash event unless we're in syscall mode + Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val, + this); -struct OoOImpl { - typedef AlphaISA ISA; - typedef OoOCPU OoOCPU; - typedef OoOCPU FullCPU; - typedef OoODynInst DynInst; - typedef RefCountingPtr DynInstPtr; -}; + if (!thread->inSyscall) { + cpu->squashFromXC(); + } -#endif // __CPU_OOO_CPU_OOO_IMPL_HH__ + return ret_fault; +} diff --git a/cpu/ozone/dyn_inst.cc b/cpu/ozone/dyn_inst.cc new file mode 100644 index 000000000..3bf8b03ca --- /dev/null +++ b/cpu/ozone/dyn_inst.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class OzoneDynInst; +template class OzoneDynInst; + diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh new file mode 100644 index 000000000..4382af0fd --- /dev/null +++ b/cpu/ozone/dyn_inst.hh @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_DYN_INST_HH__ +#define __CPU_OZONE_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/ozone/cpu.hh" // MUST include this +#include "cpu/inst_seq.hh" +#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/ozone_impl.hh" + +#include +#include + +template +class OzoneDynInst : public BaseDynInst +{ + public: + // Typedefs + typedef typename Impl::FullCPU FullCPU; + + typedef typename FullCPU::ImplState ImplState; + + // Typedef for DynInstPtr. This is really just a RefCountingPtr. + typedef typename Impl::DynInstPtr DynInstPtr; + +// typedef typename Impl::BranchPred::BPredInfo BPredInfo; + + typedef TheISA::ExtMachInst ExtMachInst; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscReg MiscReg; + typedef typename std::list::iterator ListIt; + + // Note that this is duplicated from the BaseDynInst class; I'm simply not + // sure the enum would carry through so I could use it in array + // declarations in this class. + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, + MaxInstDestRegs = TheISA::MaxInstDestRegs + }; + + OzoneDynInst(FullCPU *cpu); + + OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu); + + OzoneDynInst(StaticInstPtr inst); + + ~OzoneDynInst(); + + void setSrcInst(DynInstPtr &newSrcInst, int regIdx) + { srcInsts[regIdx] = newSrcInst; } + + bool srcInstReady(int regIdx); + + void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx) + { prevDestInst[regIdx] = oldDestInst; } + + DynInstPtr &getPrevDestInst(int regIdx) + { return prevDestInst[regIdx]; } + + void addDependent(DynInstPtr &dependent_inst); + + std::vector &getDependents() { return dependents; } + + void wakeDependents(); + +// void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; } + +// BPredInfo &getBPredInfo() { return bpInfo; } + +// OzoneXC *thread; + + private: + void initInstPtrs(); + + std::vector dependents; + + /** The instruction that produces the value of the source registers. These + * may be NULL if the value has already been read from the source + * instruction. + */ + DynInstPtr srcInsts[MaxInstSrcRegs]; + + /** + * Previous rename instruction for this destination. + */ + DynInstPtr prevDestInst[MaxInstSrcRegs]; + +// BPredInfo bpInfo; + + public: + + Fault initiateAcc(); + + Fault completeAcc(); +/* + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); +*/ + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + float readFloatRegSingle(const StaticInst *si, int idx) + { + return srcInsts[idx]->readFloatResult(); + } + + double readFloatRegDouble(const StaticInst *si, int idx) + { + return srcInsts[idx]->readDoubleResult(); + } + + uint64_t readFloatRegInt(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->instResult.integer = val; + } + + void setFloatRegSingle(const StaticInst *si, int idx, float val) + { + this->instResult.fp = val; + } + + void setFloatRegDouble(const StaticInst *si, int idx, double val) + { + this->instResult.dbl = val; + } + + void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + { + this->instResult.integer = val; + } + + void setIntResult(uint64_t result) { this->instResult.integer = result; } + void setDoubleResult(double result) { this->instResult.dbl = result; } + + bool srcsReady(); + bool eaSrcsReady(); + + Fault execute(); + + Fault executeEAComp() + { return NoFault; } + + Fault executeMemAcc() + { return this->staticInst->memAccInst()->execute(this, this->traceData); } + + void clearDependents(); + + public: + // ISA stuff + MiscReg readMiscReg(int misc_reg); + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + Fault setMiscReg(int misc_reg, const MiscReg &val); + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + +#if FULL_SYSTEM + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + + ListIt iqIt; + bool iqItValid; +}; + +/* +template +template +inline Fault +OzoneDynInst::read(Addr addr, T &data, unsigned flags) +{ + Fault fault = this->cpu->read(addr, data, flags, this); + + if (this->traceData) { + this->traceData->setAddr(addr); + this->traceData->setData(data); + } + + return fault; +} + +template +template +inline Fault +OzoneDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + Fault fault = this->cpu->write(data, addr, flags, res, this); + + this->storeSize = sizeof(T); + this->storeData = data; + + if (this->traceData) { + this->traceData->setAddr(addr); + this->traceData->setData(data); + } + + return fault; +} +*/ +#endif // __CPU_OZONE_DYN_INST_HH__ diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh new file mode 100644 index 000000000..2d86ced62 --- /dev/null +++ b/cpu/ozone/dyn_inst_impl.hh @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "kern/kernel_stats.hh" + +using namespace TheISA; + +template +OzoneDynInst::OzoneDynInst(FullCPU *cpu) + : BaseDynInst(0, 0, 0, 0, cpu) +{ + this->setCompleted(); + + initInstPtrs(); +} + +template +OzoneDynInst::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + initInstPtrs(); +} + +template +OzoneDynInst::OzoneDynInst(StaticInstPtr _staticInst) + : BaseDynInst(_staticInst) +{ + initInstPtrs(); +} + +template +OzoneDynInst::~OzoneDynInst() +{ + DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } + + dependents.clear(); +} + +template +Fault +OzoneDynInst::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +OzoneDynInst::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +OzoneDynInst::completeAcc() +{ + if (this->isLoad()) { + this->fault = this->staticInst->completeAcc(this->req->data, + this, + this->traceData); + } else if (this->isStore()) { + this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, + this, + this->traceData); + } else { + panic("Unknown type!"); + } + + return this->fault; +} + +template +bool +OzoneDynInst::srcInstReady(int regIdx) +{ + return srcInsts[regIdx]->isCompleted(); +} + +template +void +OzoneDynInst::addDependent(DynInstPtr &dependent_inst) +{ + dependents.push_back(dependent_inst); +} + +template +void +OzoneDynInst::wakeDependents() +{ + for (int i = 0; i < dependents.size(); ++i) { + dependents[i]->markSrcRegReady(); + } +} + +template +void +OzoneDynInst::initInstPtrs() +{ + for (int i = 0; i < MaxInstSrcRegs; ++i) { + srcInsts[i] = NULL; + } + iqItValid = false; +} + +template +bool +OzoneDynInst::srcsReady() +{ + for (int i = 0; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isCompleted()) + return false; + } + + return true; +} + +template +bool +OzoneDynInst::eaSrcsReady() +{ + for (int i = 1; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isCompleted()) + return false; + } + + return true; +} + +template +void +OzoneDynInst::clearDependents() +{ + dependents.clear(); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } +} +template +MiscReg +OzoneDynInst::readMiscReg(int misc_reg) +{ + return this->thread->readMiscReg(misc_reg); +} + +template +MiscReg +OzoneDynInst::readMiscRegWithEffect(int misc_reg, Fault &fault) +{ + return this->thread->readMiscRegWithEffect(misc_reg, fault); +} + +template +Fault +OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) +{ + return this->thread->setMiscReg(misc_reg, val); +} + +template +Fault +OzoneDynInst::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +{ + return this->thread->setMiscRegWithEffect(misc_reg, val); +} + +#if FULL_SYSTEM + +template +Fault +OzoneDynInst::hwrei() +{ + if (!this->cpu->inPalMode(this->readPC())) + return new AlphaISA::UnimplementedOpcodeFault; + + this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + + this->cpu->kernelStats->hwrei(); + + this->cpu->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template +int +OzoneDynInst::readIntrFlag() +{ +return this->cpu->readIntrFlag(); +} + +template +void +OzoneDynInst::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template +bool +OzoneDynInst::inPalMode() +{ + return this->cpu->inPalMode(); +} + +template +void +OzoneDynInst::trap(Fault fault) +{ + fault->invoke(this->thread->getXCProxy()); +} + +template +bool +OzoneDynInst::simPalCheck(int palFunc) +{ + return this->cpu->simPalCheck(palFunc); +} +#else +template +void +OzoneDynInst::syscall() +{ + this->cpu->syscall(); +} +#endif diff --git a/cpu/ozone/front_end.cc b/cpu/ozone/front_end.cc new file mode 100644 index 000000000..a974d43cb --- /dev/null +++ b/cpu/ozone/front_end.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/front_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class FrontEnd; +template class FrontEnd; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh new file mode 100644 index 000000000..5e257b506 --- /dev/null +++ b/cpu/ozone/front_end.hh @@ -0,0 +1,242 @@ + +#ifndef __CPU_OZONE_FRONT_END_HH__ +#define __CPU_OZONE_FRONT_END_HH__ + +#include + +//#include "cpu/ozone/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/rename_table.hh" +//#include "cpu/ozone/thread_state.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" +#include "sim/stats.hh" + +class ExecContext; +class MemInterface; +template +class OzoneThreadState; +class PageTable; +template +class TimeBuffer; + +template +class FrontEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + + typedef typename Impl::FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + FrontEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setBackEnd(BackEnd *back_end_ptr) + { backEnd = back_end_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState *thread_ptr) + { thread = thread_ptr; } + + void regStats(); + + void tick(); + Fault fetchCacheLine(); + void processInst(DynInstPtr &inst); + void squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch = false, const bool branch_taken = false); + DynInstPtr getInst(); + + void processCacheCompletion(); + + void addFreeRegs(int num_freed); + + bool isEmpty() { return instBuffer.empty(); } + + private: + bool updateStatus(); + + void checkBE(); + DynInstPtr getInstFromCacheline(); + void renameInst(DynInstPtr &inst); + // Returns true if we need to stop the front end this cycle + bool processBarriers(DynInstPtr &inst); + + void handleFault(Fault &fault); + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + InstSeqNum getAndIncrementInstSeq() + { return cpu->globalSeqNum++; } + + public: + FullCPU *cpu; + + BackEnd *backEnd; + + ExecContext *xc; + + OzoneThreadState *thread; + + enum Status { + Running, + Idle, + IcacheMissStall, + IcacheMissComplete, + SerializeBlocked, + SerializeComplete, + RenameBlocked, + BEBlocked + }; + + Status status; + + private: + TimeBuffer *comm; + typename TimeBuffer::wire fromCommit; + + typedef typename Impl::BranchPred BranchPred; + + // Typedef for semi-opaque type that holds any information the branch + // predictor needs to update itself. Only two fields are used outside of + // branch predictor, nextPC and isTaken. +// typedef typename BranchPred::BPredInfo BPredInfo; + + BranchPred branchPred; + + class ICacheCompletionEvent : public Event + { + private: + FrontEnd *frontEnd; + + public: + ICacheCompletionEvent(FrontEnd *_fe); + + virtual void process(); + virtual const char *description(); + }; + + ICacheCompletionEvent cacheCompletionEvent; + + MemInterface *icacheInterface; + +#if !FULL_SYSTEM + PageTable *pTable; +#endif + + MemReqPtr memReq; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + bool fetchCacheLineNextCycle; + + bool cacheBlkValid; + + public: + RenameTable renameTable; + + private: + Addr PC; + Addr nextPC; + + public: + void setPC(Addr val) { PC = val; } + void setNextPC(Addr val) { nextPC = val; } + + void dumpInsts(); + + private: + typedef typename std::deque InstBuff; + typedef typename InstBuff::iterator InstBuffIt; + + InstBuff instBuffer; + + int instBufferSize; + + int maxInstBufferSize; + + int width; + + int freeRegs; + + int numPhysRegs; + + bool serializeNext; + + DynInstPtr barrierInst; + + // number of idle cycles +/* + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; +*/ + // @todo: Consider making these vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar<> icacheStallCycles; + /** Stat for total number of fetched instructions. */ + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> fetchedBranches; + /** Stat for total number of predicted branches. */ + Stats::Scalar<> predictedBranches; + /** Stat for total number of cycles spent fetching. */ + Stats::Scalar<> fetchCycles; + + Stats::Scalar<> fetchIdleCycles; + /** Stat for total number of cycles spent squashing. */ + Stats::Scalar<> fetchSquashCycles; + /** Stat for total number of cycles spent blocked due to other stages in + * the pipeline. + */ + Stats::Scalar<> fetchBlockedCycles; + /** Stat for total number of fetched cache lines. */ + Stats::Scalar<> fetchedCacheLines; + /** Distribution of number of instructions fetched each cycle. */ + Stats::Distribution<> fetchNisnDist; +// Stats::Vector<> qfull_iq_occupancy; +// Stats::VectorDistribution<> qfull_iq_occ_dist_; + Stats::Formula idleRate; + Stats::Formula branchRate; + Stats::Formula fetchRate; + Stats::Scalar<> IFQCount; // cumulative IFQ occupancy + Stats::Formula IFQOccupancy; + Stats::Formula IFQLatency; + Stats::Scalar<> IFQFcount; // cumulative IFQ full count + Stats::Formula IFQFullRate; + + Stats::Scalar<> dispatchCountStat; + Stats::Scalar<> dispatchedSerializing; + Stats::Scalar<> dispatchedTempSerializing; + Stats::Scalar<> dispatchSerializeStallCycles; + Stats::Formula dispatchRate; + Stats::Formula regIntFull; + Stats::Formula regFpFull; +}; + +#endif // __CPU_OZONE_FRONT_END_HH__ diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh new file mode 100644 index 000000000..0136d0ef0 --- /dev/null +++ b/cpu/ozone/front_end_impl.hh @@ -0,0 +1,798 @@ + +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/ozone/front_end.hh" +#include "mem/mem_interface.hh" +#include "sim/byte_swap.hh" + +using namespace TheISA; + +template +FrontEnd::FrontEnd(Params *params) + : branchPred(params), + cacheCompletionEvent(this), + icacheInterface(params->icacheInterface), + instBufferSize(0), + maxInstBufferSize(params->maxInstBufferSize), + width(params->frontEndWidth), + freeRegs(params->numPhysicalRegs), + numPhysRegs(params->numPhysicalRegs), + serializeNext(false) +{ + status = Idle; + + // Setup branch predictor. + + // Setup Memory Request + memReq = new MemReq(); + memReq->asid = 0; + memReq->data = new uint8_t[64]; + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + assert(isPowerOf2(cacheBlkSize)); + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; + + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM + pTable = params->pTable; +#endif +} + +template +std::string +FrontEnd::name() const +{ + return cpu->name() + ".frontend"; +} + +template +void +FrontEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + // @todo: Hardcoded for now. Allow this to be set by a latency. + fromCommit = comm->getWire(-1); +} + +template +void +FrontEnd::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; + memReq->xc = xc; +} + +template +void +FrontEnd::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + + fetchedBranches + .name(name() + ".fetchedBranches") + .desc("Number of fetched branches") + .prereq(fetchedBranches); + + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + + fetchIdleCycles + .name(name() + ".fetchIdleCycles") + .desc("Number of cycles fetch was idle") + .prereq(fetchIdleCycles); + + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetchNisnDist + .init(/* base value */ 0, + /* last value */ width, + /* bucket size */ 1) + .name(name() + ".rateDist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf); + + idleRate + .name(name() + ".idleRate") + .desc("Percent of cycles fetch was idle") + .prereq(idleRate); + idleRate = fetchIdleCycles * 100 / cpu->numCycles; + + branchRate + .name(name() + ".branchRate") + .desc("Number of branch fetches per cycle") + .flags(Stats::total); + branchRate = fetchedBranches / cpu->numCycles; + + fetchRate + .name(name() + ".rate") + .desc("Number of inst fetches per cycle") + .flags(Stats::total); + fetchRate = fetchedInsts / cpu->numCycles; + + IFQCount + .name(name() + ".IFQ:count") + .desc("cumulative IFQ occupancy") + ; + + IFQFcount + .name(name() + ".IFQ:fullCount") + .desc("cumulative IFQ full count") + .flags(Stats::total) + ; + + IFQOccupancy + .name(name() + ".IFQ:occupancy") + .desc("avg IFQ occupancy (inst's)") + ; + IFQOccupancy = IFQCount / cpu->numCycles; + + IFQLatency + .name(name() + ".IFQ:latency") + .desc("avg IFQ occupant latency (cycle's)") + .flags(Stats::total) + ; + + IFQFullRate + .name(name() + ".IFQ:fullRate") + .desc("fraction of time (cycles) IFQ was full") + .flags(Stats::total); + ; + IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; + + dispatchCountStat + .name(name() + ".DIS:count") + .desc("cumulative count of dispatched insts") + .flags(Stats::total) + ; + + dispatchedSerializing + .name(name() + ".DIS:serializingInsts") + .desc("count of serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchedTempSerializing + .name(name() + ".DIS:tempSerializingInsts") + .desc("count of temporary serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchSerializeStallCycles + .name(name() + ".DIS:serializeStallCycles") + .desc("count of cycles dispatch stalled for serializing inst") + .flags(Stats::total) + ; + + dispatchRate + .name(name() + ".DIS:rate") + .desc("dispatched insts per cycle") + .flags(Stats::total) + ; + dispatchRate = dispatchCountStat / cpu->numCycles; + + regIntFull + .name(name() + ".REG:int:full") + .desc("number of cycles where there were no INT registers") + ; + + regFpFull + .name(name() + ".REG:fp:full") + .desc("number of cycles where there were no FP registers") + ; + IFQLatency = IFQOccupancy / dispatchRate; + + branchPred.regStats(); +} + +template +void +FrontEnd::tick() +{ + // @todo: Maybe I want to just have direct communication... + if (fromCommit->doneSeqNum) { + branchPred.update(fromCommit->doneSeqNum, 0); + } + + IFQCount += instBufferSize; + IFQFcount += instBufferSize == maxInstBufferSize; + + // Fetch cache line + if (status == IcacheMissComplete) { + cacheBlkValid = true; + + status = Running; + if (barrierInst) + status = SerializeBlocked; + if (freeRegs <= 0) + status = RenameBlocked; + checkBE(); + } else if (status == IcacheMissStall) { + DPRINTF(FE, "Still in Icache miss stall.\n"); + icacheStallCycles++; + return; + } + + if (status == RenameBlocked || status == SerializeBlocked || + status == BEBlocked) { + // This might cause the front end to run even though it + // shouldn't, but this should only be a problem for one cycle. + // Also will cause a one cycle bubble between changing state + // and restarting. + DPRINTF(FE, "In blocked status.\n"); + + fetchBlockedCycles++; + + if (status == SerializeBlocked) { + dispatchSerializeStallCycles++; + } + updateStatus(); + return; + } else if (status != IcacheMissComplete) { + if (fetchCacheLineNextCycle) { + Fault fault = fetchCacheLine(); + if (fault != NoFault) { + handleFault(fault); + return; + } + fetchCacheLineNextCycle = false; + } + // If miss, stall until it returns. + if (status == IcacheMissStall) { + // Tell CPU to not tick me for now. + return; + } + } + + fetchCycles++; + + int num_inst = 0; + + // Otherwise loop and process instructions. + // One way to hack infinite width is to set width and maxInstBufferSize + // both really high. Inelegant, but probably will work. + while (num_inst < width && + instBufferSize < maxInstBufferSize) { + // Get instruction from cache line. + DynInstPtr inst = getInstFromCacheline(); + + if (!inst) { + // PC is no longer in the cache line, end fetch. + // Might want to check this at the end of the cycle so that + // there's no cycle lost to checking for a new cache line. + DPRINTF(FE, "Need to get new cache line\n"); + fetchCacheLineNextCycle = true; + break; + } + + // if (generalizeFetch) { + processInst(inst); + + if (status == SerializeBlocked) { + break; + } + + // Possibly push into a time buffer that estimates the front end + // latency + instBuffer.push_back(inst); + ++instBufferSize; + ++num_inst; + // } else { + // fetch(num_inst); + // decode(num_inst); + // rename(num_inst); + // } + + if (inst->predTaken()) { + // Start over with tick? + break; + } else if (freeRegs <= 0) { + DPRINTF(FE, "Ran out of free registers to rename to!\n"); + status = RenameBlocked; + break; + } else if (serializeNext) { + break; + } + } + + fetchNisnDist.sample(num_inst); + checkBE(); + + DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " + "Regs %i\n", num_inst, instBufferSize, freeRegs); +} + +template +Fault +FrontEnd::fetchCacheLine() +{ + // Read a cache line, based on the current PC. +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + Fault fault = NoFault; + + // Align the fetch PC so it's at the start of a cache block. + Addr fetch_PC = icacheBlockAlignPC(PC); + + DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // In the case of faults, the fetch stage may need to stall and wait + // on what caused the fetch (ITB or Icache miss). +// assert(fault == NoFault); + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { + memReq->completionEvent = NULL; + + memReq->time = curTick; + + MemAccessResult res = icacheInterface->access(memReq); + + // If the cache missed then schedule an event to wake + // up this stage once the cache miss completes. + if (icacheInterface->doEvents() && res != MA_HIT) { + memReq->completionEvent = new ICacheCompletionEvent(this); + + status = IcacheMissStall; + + cacheBlkValid = false; + + DPRINTF(FE, "Cache miss.\n"); + } else { + DPRINTF(FE, "Cache hit.\n"); + + cacheBlkValid = true; + + memcpy(cacheData, memReq->data, memReq->size); + } + } + + // Note that this will set the cache block PC a bit earlier than it should + // be set. + cacheBlkPC = fetch_PC; + + ++fetchedCacheLines; + + DPRINTF(FE, "Done fetching cache line.\n"); + + return fault; +} + +template +void +FrontEnd::processInst(DynInstPtr &inst) +{ + if (processBarriers(inst)) { + return; + } + + Addr inst_PC = inst->readPC(); + +// BPredInfo bp_info = branchPred.lookup(inst_PC); + if (!inst->isControl()) { + inst->setPredTarg(inst->readNextPC()); + } else { + fetchedBranches++; + if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { + predictedBranches++; + } + } + + Addr next_PC = inst->readPredTarg(); + + DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " + "%#x\n", inst->seqNum, inst_PC, next_PC); + +// inst->setNextPC(next_PC); +// inst->setBPredInfo(bp_info); + + // Not sure where I should set this + PC = next_PC; + + renameInst(inst); +} + +template +bool +FrontEnd::processBarriers(DynInstPtr &inst) +{ + if (serializeNext) { + inst->setSerializeBefore(); + serializeNext = false; + } else if (!inst->isSerializing()) { + return false; + } + + if (inst->isSerializeBefore() && !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize before instruction encountered.\n"); + + if (!inst->isTempSerializeBefore()) { + dispatchedSerializing++; + inst->setSerializeHandled(); + } else { + dispatchedTempSerializing++; + } + + // Change status over to BarrierStall so that other stages know + // what this is blocked on. + status = SerializeBlocked; + + barrierInst = inst; + return true; + } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize after instruction encountered.\n"); + + inst->setSerializeHandled(); + + dispatchedSerializing++; + + serializeNext = true; + return false; + } + return false; +} + +template +void +FrontEnd::handleFault(Fault &fault) +{ + DPRINTF(FE, "Fault at fetch, telling commit\n"); + backEnd->fetchFault(fault); + // We're blocked on the back end until it handles this fault. + status = BEBlocked; +} + +template +void +FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch, const bool branch_taken) +{ + DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + while (!instBuffer.empty() && + instBuffer.back()->seqNum > squash_num) { + DynInstPtr inst = instBuffer.back(); + + DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", + inst->seqNum, inst->readPC()); + + inst->clearDependents(); + + instBuffer.pop_back(); + --instBufferSize; + + // Fix up branch predictor if necessary. +// branchPred.undo(inst->getBPredInfo()); + + freeRegs+= inst->numDestRegs(); + } + + // Copy over rename table from the back end. + renameTable.copyFrom(backEnd->renameTable); + + PC = next_PC; + + // Update BP with proper information. + if (is_branch) { + branchPred.squash(squash_num, next_PC, branch_taken, 0); + } else { + branchPred.squash(squash_num, 0); + } + + // Clear the icache miss if it's outstanding. + if (status == IcacheMissStall && icacheInterface) { + DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + icacheInterface->squash(0); + } + + if (status == SerializeBlocked) { + assert(barrierInst->seqNum > squash_num); + barrierInst = NULL; + } + + // Unless this squash originated from the front end, we're probably + // in running mode now. + // Actually might want to make this latency dependent. + status = Running; + fetchCacheLineNextCycle = true; +} + +template +typename Impl::DynInstPtr +FrontEnd::getInst() +{ + if (instBufferSize == 0) { + return NULL; + } + + DynInstPtr inst = instBuffer.front(); + + instBuffer.pop_front(); + + --instBufferSize; + + dispatchCountStat++; + + return inst; +} + +template +void +FrontEnd::processCacheCompletion() +{ + DPRINTF(FE, "Processing cache completion\n"); + + // Do something here. + if (status != IcacheMissStall) { + DPRINTF(FE, "Previous fetch was squashed.\n"); + return; + } + + status = IcacheMissComplete; + +/* if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheMissComplete; + } +*/ + memcpy(cacheData, memReq->data, memReq->size); + + // Reset the completion event to NULL. + memReq->completionEvent = NULL; +} + +template +void +FrontEnd::addFreeRegs(int num_freed) +{ + if (status == RenameBlocked && freeRegs + num_freed > 0) { + status = Running; + } + + freeRegs+= num_freed; + + assert(freeRegs <= numPhysRegs); +} + +template +bool +FrontEnd::updateStatus() +{ +// bool rename_block = freeRegs <= 0; + bool serialize_block = !backEnd->robEmpty() || instBufferSize; + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + bool ret_val = false; +/* + // Should already be handled through addFreeRegs function + if (status == RenameBlocked && !rename_block) { + status = Running; + ret_val = true; + } +*/ + + if (status == SerializeBlocked && !serialize_block) { + status = SerializeComplete; + ret_val = true; + } + + if (status == BEBlocked && !be_block) { + if (barrierInst) { + status = SerializeBlocked; + } else { + status = Running; + } + ret_val = true; + } + return ret_val; +} + +template +void +FrontEnd::checkBE() +{ + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + if (be_block) { + if (status == Running || status == Idle) { + status = BEBlocked; + } + } +} + +template +typename Impl::DynInstPtr +FrontEnd::getInstFromCacheline() +{ + if (status == SerializeComplete) { + DynInstPtr inst = barrierInst; + status = Running; + barrierInst = NULL; + return inst; + } + + InstSeqNum inst_seq; + MachInst inst; + // @todo: Fix this magic number used here to handle word offset (and + // getting rid of PAL bit) + unsigned offset = (PC & cacheBlkMask) & ~3; + + // PC of inst is not in this cache block + if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { +// DPRINTF(OoOCPU, "OoOCPU: PC is not in this cache block\n"); +// DPRINTF(OoOCPU, "OoOCPU: PC: %#x, cacheBlkPC: %#x, cacheBlkValid: %i", +// PC, cacheBlkPC, cacheBlkValid); +// panic("Instruction not in cache line or cache line invalid!"); + return NULL; + } + + ////////////////////////// + // Fetch one instruction + ////////////////////////// + + // Get a sequence number. + inst_seq = getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - sizeof(MachInst)); + + // Get the instruction from the array of the cache line. + inst = htog(*reinterpret_cast(&cacheData[offset])); + + ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), + inst_seq, cpu); + + instruction->setState(thread); + + DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", + inst_seq, instruction->readPC(), + instruction->staticInst->disassemble(PC)); + + instruction->traceData = + Trace::getInstRecord(curTick, xc, cpu, + instruction->staticInst, + instruction->readPC(), 0); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + return instruction; +} + +template +void +FrontEnd::renameInst(DynInstPtr &inst) +{ + DynInstPtr src_inst = NULL; + int num_src_regs = inst->numSrcRegs(); + if (num_src_regs == 0) { + inst->setCanIssue(); + } else { + for (int i = 0; i < num_src_regs; ++i) { + src_inst = renameTable[inst->srcRegIdx(i)]; + + inst->setSrcInst(src_inst, i); + + DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", + inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); + + if (src_inst->isCompleted()) { + DPRINTF(FE, "Reg ready.\n"); + inst->markSrcRegReady(i); + } else { + DPRINTF(FE, "Adding to dependent list.\n"); + src_inst->addDependent(inst); + } + } + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + RegIndex idx = inst->destRegIdx(i); + + DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " + "[sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum, + renameTable[idx]->seqNum); + + inst->setPrevDestInst(renameTable[idx], i); + + renameTable[idx] = inst; + --freeRegs; + } +} + +template +void +FrontEnd::dumpInsts() +{ + cprintf("instBuffer size: %i\n", instBuffer.size()); + + InstBuffIt buff_it = instBuffer.begin(); + + for (int num = 0; buff_it != instBuffer.end(); num++) { + cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*buff_it)->readPC(), (*buff_it)->threadNumber, + (*buff_it)->seqNum, (*buff_it)->isIssued(), + (*buff_it)->isSquashed()); + buff_it++; + } + +} + +template +FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(FrontEnd *fe) + : Event(&mainEventQueue, Delayed_Writeback_Pri), frontEnd(fe) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +FrontEnd::ICacheCompletionEvent::process() +{ + frontEnd->processCacheCompletion(); +} + +template +const char * +FrontEnd::ICacheCompletionEvent::description() +{ + return "ICache completion event"; +} diff --git a/cpu/ozone/inorder_back_end.cc b/cpu/ozone/inorder_back_end.cc new file mode 100644 index 000000000..14db610d2 --- /dev/null +++ b/cpu/ozone/inorder_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/inorder_back_end_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class InorderBackEnd; diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh new file mode 100644 index 000000000..e621f6c01 --- /dev/null +++ b/cpu/ozone/inorder_back_end.hh @@ -0,0 +1,417 @@ + +#ifndef __CPU_OZONE_INORDER_BACK_END_HH__ +#define __CPU_OZONE_INORDER_BACK_END_HH__ + +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/exec_context.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +template +class InorderBackEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + + typedef typename FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + InorderBackEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setCommBuffer(TimeBuffer *_comm) + { comm = _comm; } + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState *thread_ptr); + + void regStats() { } + +#if FULL_SYSTEM + void checkInterrupts(); +#endif + + void tick(); + void executeInsts(); + void squash(const InstSeqNum &squash_num, const Addr &next_PC); + + void squashFromXC(); + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return false; } + bool isBlocked() { return status == DcacheMissStoreStall || + status == DcacheMissLoadStall || + interruptBlocked; } + + void fetchFault(Fault &fault); + + void dumpInsts(); + + private: + void handleFault(); + + void setSquashInfoFromXC(); + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + bool interruptBlocked; + + public: + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + template + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + OzoneThreadState *thread; + + RenameTable renameTable; + + protected: + enum Status { + Running, + Idle, + DcacheMissLoadStall, + DcacheMissStoreStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + class DCacheCompletionEvent : public Event + { + private: + InorderBackEnd *be; + + public: + DCacheCompletionEvent(InorderBackEnd *_be); + + virtual void process(); + virtual const char *description(); + + DynInstPtr inst; + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + private: + typedef typename std::list::iterator InstListIt; + + std::list instList; + + // General back end width. Used if the more specific isn't given. + int width; + + int latency; + + int squashLatency; + + TimeBuffer numInstsToWB; + TimeBuffer::wire instsAdded; + TimeBuffer::wire instsToExecute; + + TimeBuffer *comm; + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +}; + +template +template +Fault +InorderBackEnd::read(Addr addr, T &data, unsigned flags) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + // do functional access + DPRINTF(IBE, "Dcache hit!\n"); + } + } +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return fault; +} +#if 0 +template +template +Fault +InorderBackEnd::read(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = thread->mem->read(req, data); + data = LittleEndianGuest::gtoh(data); + return error; +} +#endif + +template +template +Fault +InorderBackEnd::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; +// memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return fault; +} +#if 0 +template +template +Fault +InorderBackEnd::write(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < cpu->system->execContexts.size(); i++){ + xc = cpu->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return thread->mem->write(req, (T)LittleEndianGuest::htog(data)); +} +#endif + +template +template +Fault +InorderBackEnd::read(MemReqPtr &req, T &data, int load_idx) +{ +// panic("Unimplemented!"); +// memReq->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataReadReq(req); + + // if we have a cache, do cache access too + if (dcacheInterface) { + req->cmd = Read; + req->completionEvent = NULL; + req->data = new uint8_t[64]; + req->time = curTick; + req->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss load stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } + +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return NoFault; +} + +template +template +Fault +InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) +{ +// req->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataWriteReq(req); + + if (dcacheInterface) { + req->cmd = Write; + req->data = new uint8_t[64]; + memcpy(req->data,(uint8_t *)&data,req->size); + req->completionEvent = NULL; + req->time = curTick; + req->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss store stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } + + if (req->flags & LOCKED) { + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + } else { + req->result = 1; + } + } +/* + if (res && (fault == NoFault)) + *res = req->result; + */ +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return NoFault; +} + +#endif // __CPU_OZONE_INORDER_BACK_END_HH__ diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh new file mode 100644 index 000000000..5a378ec76 --- /dev/null +++ b/cpu/ozone/inorder_back_end_impl.hh @@ -0,0 +1,519 @@ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/thread_state.hh" + +using namespace TheISA; + +template +InorderBackEnd::InorderBackEnd(Params *params) + : squashPending(false), + squashSeqNum(0), + squashNextPC(0), + faultFromFetch(NoFault), + interruptBlocked(false), + cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), + width(params->backEndWidth), + latency(params->backEndLatency), + squashLatency(params->backEndSquashLatency), + numInstsToWB(0, latency + 1) +{ + instsAdded = numInstsToWB.getWire(latency); + instsToExecute = numInstsToWB.getWire(0); + + memReq = new MemReq; + memReq->data = new uint8_t[64]; + status = Running; +} + +template +std::string +InorderBackEnd::name() const +{ + return cpu->name() + ".inorderbackend"; +} + +template +void +InorderBackEnd::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; + memReq->xc = xc; +} + +template +void +InorderBackEnd::setThreadState(OzoneThreadState *thread_ptr) +{ + thread = thread_ptr; + thread->setFuncExeInst(0); +} + +#if FULL_SYSTEM +template +void +InorderBackEnd::checkInterrupts() +{ + //Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + cpu->checkInterrupts = false; + + if (thread->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (thread->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpu->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) { + thread->inSyscall = true; + + thread->setMiscReg(IPR_ISR, summary); + thread->setMiscReg(IPR_INTID, ipl); + Fault(new InterruptFault)->invoke(xc); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + thread->readMiscReg(IPR_IPLR), ipl, summary); + + // May need to go 1 inst prior + squashPending = true; + + thread->inSyscall = false; + + setSquashInfoFromXC(); + } +} +#endif + +template +void +InorderBackEnd::tick() +{ + // Squash due to an external source + // Not sure if this or an interrupt has higher priority + if (squashPending) { + squash(squashSeqNum, squashNextPC); + return; + } + + // if (interrupt) then set thread PC, stall front end, record that + // I'm waiting for it to drain. (for now just squash) +#if FULL_SYSTEM + if (interruptBlocked || + (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode())) { + if (!robEmpty()) { + interruptBlocked = true; + } else if (robEmpty() && cpu->inPalMode()) { + // Will need to let the front end continue a bit until + // we're out of pal mode. Hopefully we never get into an + // infinite loop... + interruptBlocked = false; + } else { + interruptBlocked = false; + checkInterrupts(); + return; + } + } +#endif + + if (status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + for (int i = 0; i < width && (*instsAdded) < width; ++i) { + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) + break; + + instList.push_back(inst); + + (*instsAdded)++; + } + +#if FULL_SYSTEM + if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) { + handleFault(); + } else { + executeInsts(); + } +#else + executeInsts(); +#endif + } +} + +template +void +InorderBackEnd::executeInsts() +{ + bool completed_last_inst = true; + int insts_to_execute = *instsToExecute; + int freed_regs = 0; + + while (insts_to_execute > 0) { + assert(!instList.empty()); + DynInstPtr inst = instList.front(); + + commitPC = inst->readPC(); + + thread->setPC(commitPC); + thread->setNextPC(inst->readNextPC()); + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(IBE, "PC skip function event, stopping commit\n"); + completed_last_inst = false; + squashPending = true; + break; + } +#endif + + Fault inst_fault = NoFault; + + if (status == DcacheMissComplete) { + DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum); + status = Running; + } else if (inst->isMemRef() && status != DcacheMissComplete && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + cacheCompletionEvent.inst = inst; + inst_fault = inst->initiateAcc(); + if (inst_fault == NoFault && + status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + inst_fault = inst->completeAcc(); + } + ++thread->funcExeInst; + } else { + DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + inst_fault = inst->execute(); + ++thread->funcExeInst; + } + + // Will need to be able to break this loop in case the load + // misses. Split access/complete ops would be useful here + // with writeback events. + if (status == DcacheMissLoadStall) { + *instsToExecute = insts_to_execute; + + completed_last_inst = false; + break; + } else if (status == DcacheMissStoreStall) { + // Figure out how to fix this hack. Probably have DcacheMissLoad + // vs DcacheMissStore. + *instsToExecute = insts_to_execute; + completed_last_inst = false; +/* + instList.pop_front(); + --insts_to_execute; + if (inst->traceData) { + inst->traceData->finalize(); + } +*/ + + // Don't really need to stop for a store stall as long as + // the memory system is able to handle store forwarding + // and such. Breaking out might help avoid the cache + // interface becoming blocked. + break; + } + + inst->setExecuted(); + inst->setCompleted(); + inst->setCanCommit(); + + instList.pop_front(); + + --insts_to_execute; + --(*instsToExecute); + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + if (inst_fault != NoFault) { +#if FULL_SYSTEM + DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Hack for now; DTB will sometimes need the machine instruction + // for when faults happen. So we will set it here, prior to the + // DTB possibly needing it for this translation. + thread->setInst( + static_cast(inst->staticInst->machInst)); + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + // Generate trap squash event. +// generateTrapEvent(tid); + completed_last_inst = false; + break; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + renameTable[inst->destRegIdx(i)] = inst; + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + inst->clearDependents(); + + comm->access(0)->doneSeqNum = inst->seqNum; + + if (inst->mispredicted()) { + squash(inst->seqNum, inst->readNextPC()); + + thread->setNextPC(inst->readNextPC()); + + break; + } else if (squashPending) { + // Something external happened that caused the CPU to squash. + // Break out of commit and handle the squash next cycle. + break; + } + // If it didn't mispredict, then it executed fine. Send back its + // registers and BP info? What about insts that may still have + // latency, like loads? Probably can send back the information after + // it is completed. + + // keep an instruction count + cpu->numInst++; + thread->numInsts++; + } + + frontEnd->addFreeRegs(freed_regs); + + assert(insts_to_execute >= 0); + + // Should only advance this if I have executed all instructions. + if (insts_to_execute == 0) { + numInstsToWB.advance(); + } + + // Should I set the PC to the next PC here? What do I set next PC to? + if (completed_last_inst) { + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readPC() + sizeof(MachInst)); + } + + if (squashPending) { + setSquashInfoFromXC(); + } +} + +template +void +InorderBackEnd::handleFault() +{ + DPRINTF(Commit, "Handling fault from fetch\n"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + faultFromFetch->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + setSquashInfoFromXC(); +} + +template +void +InorderBackEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC) +{ + DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + InstListIt squash_it = --(instList.end()); + + int freed_regs = 0; + + while (!instList.empty() && (*squash_it)->seqNum > squash_num) { + DynInstPtr inst = *squash_it; + + DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n", + inst->readPC(), + inst->seqNum); + + // May cause problems with misc regs + freed_regs+= inst->numDestRegs(); + inst->clearDependents(); + squash_it--; + instList.pop_back(); + } + + frontEnd->addFreeRegs(freed_regs); + + for (int i = 0; i < latency+1; ++i) { + numInstsToWB.advance(); + } + + squashPending = false; + + // Probably want to make sure that this squash is the one that set the + // thread into inSyscall mode. + thread->inSyscall = false; + + // Tell front end to squash, reset PC to new one. + frontEnd->squash(squash_num, next_PC); + + faultFromFetch = NULL; +} + +template +void +InorderBackEnd::squashFromXC() +{ + // Record that I need to squash + squashPending = true; + + thread->inSyscall = true; +} + +template +void +InorderBackEnd::setSquashInfoFromXC() +{ + // Need to handle the case of the instList being empty. In that case + // probably any number works, except maybe with stores in the store buffer. + squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1; + + squashNextPC = thread->PC; +} + +template +void +InorderBackEnd::fetchFault(Fault &fault) +{ + faultFromFetch = fault; +} + +template +void +InorderBackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} + +template +InorderBackEnd::DCacheCompletionEvent::DCacheCompletionEvent( + InorderBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +// this->setFlags(Event::AutoDelete); +} + +template +void +InorderBackEnd::DCacheCompletionEvent::process() +{ + inst->completeAcc(); + be->status = DcacheMissComplete; +} + +template +const char * +InorderBackEnd::DCacheCompletionEvent::description() +{ + return "DCache completion event"; +} diff --git a/cpu/ozone/inst_queue.cc b/cpu/ozone/inst_queue.cc new file mode 100644 index 000000000..9c61602d9 --- /dev/null +++ b/cpu/ozone/inst_queue.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstQueue; +template class InstQueue; diff --git a/cpu/ozone/inst_queue.hh b/cpu/ozone/inst_queue.hh new file mode 100644 index 000000000..2cbbb7987 --- /dev/null +++ b/cpu/ozone/inst_queue.hh @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_INST_QUEUE_HH__ +#define __CPU_OZONE_INST_QUEUE_HH__ + +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +class FUPool; +class MemInterface; + +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. The IQ depends on the memory dependence unit to + * track when memory operations are ready in terms of ordering; register + * dependencies are tracked normally. Right now the IQ also handles the + * execution timing; this is mainly to allow back-to-back scheduling without + * requiring IEW to be able to peek into the IQ. At the end of the execution + * latency, the instruction is put into the queue to execute, where it will + * have the execute() function called on it. + * @todo: Make IQ able to handle multiple FU pools. + */ +template +class InstQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::IssueStruct IssueStruct; +/* + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; +*/ + // Typedef of iterator through the list of instructions. + typedef typename std::list::iterator ListIt; + + friend class Impl::FullCPU; +#if 0 + /** FU completion event class. */ + class FUCompletion : public Event { + private: + /** Executing instruction. */ + DynInstPtr inst; + + /** Index of the FU used for executing. */ + int fuIdx; + + /** Pointer back to the instruction queue. */ + InstQueue *iqPtr; + + public: + /** Construct a FU completion event. */ + FUCompletion(DynInstPtr &_inst, int fu_idx, + InstQueue *iq_ptr); + + virtual void process(); + virtual const char *description(); + }; +#endif + /** Constructs an IQ. */ + InstQueue(Params *params); + + /** Destructs the IQ. */ + ~InstQueue(); + + /** Returns the name of the IQ. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets CPU pointer. */ + void setCPU(FullCPU *_cpu) { cpu = _cpu; } +#if 0 + /** Sets active threads list. */ + void setActiveThreads(list *at_ptr); + + /** Sets the IEW pointer. */ + void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } +#endif + /** Sets the timer buffer between issue and execute. */ + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); +#if 0 + /** Sets the global time buffer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + /** Number of entries needed for given amount of threads. */ + int entryAmount(int num_threads); + + /** Resets max entries for all threads. */ + void resetEntries(); +#endif + /** Returns total number of free entries. */ + unsigned numFreeEntries(); + + /** Returns number of free entries for a thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns whether or not the IQ is full. */ + bool isFull(); + + /** Returns whether or not the IQ is full for a specific thread. */ + bool isFull(unsigned tid); + + /** Returns if there are any ready instructions in the IQ. */ + bool hasReadyInsts(); + + /** Inserts a new instruction into the IQ. */ + void insert(DynInstPtr &new_inst); + + /** Inserts a new, non-speculative instruction into the IQ. */ + void insertNonSpec(DynInstPtr &new_inst); +#if 0 + /** + * Advances the tail of the IQ, used if an instruction is not added to the + * IQ for scheduling. + * @todo: Rename this function. + */ + void advanceTail(DynInstPtr &inst); + + /** Process FU completion event. */ + void processFUCompletion(DynInstPtr &inst, int fu_idx); +#endif + /** + * Schedules ready instructions, adding the ready ones (oldest first) to + * the queue to execute. + */ + void scheduleReadyInsts(); + + /** Schedules a single specific non-speculative instruction. */ + void scheduleNonSpec(const InstSeqNum &inst); + + /** + * Commits all instructions up to and including the given sequence number, + * for a specific thread. + */ + void commit(const InstSeqNum &inst, unsigned tid = 0); + + /** Wakes all dependents of a completed instruction. */ + void wakeDependents(DynInstPtr &completed_inst); + + /** Adds a ready memory instruction to the ready list. */ + void addReadyMemInst(DynInstPtr &ready_inst); +#if 0 + /** + * Reschedules a memory instruction. It will be ready to issue once + * replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &resched_inst); + + /** Replays a memory instruction. It must be rescheduled first. */ + void replayMemInst(DynInstPtr &replay_inst); +#endif + /** Completes a memory operation. */ + void completeMemInst(DynInstPtr &completed_inst); +#if 0 + /** Indicates an ordering violation between a store and a load. */ + void violation(DynInstPtr &store, DynInstPtr &faulting_load); +#endif + /** + * Squashes instructions for a thread. Squashing information is obtained + * from the time buffer. + */ + void squash(unsigned tid); // Probably want the ISN + + /** Returns the number of used entries for a thread. */ + unsigned getCount(unsigned tid) { return count[tid]; }; + + /** Updates the number of free entries. */ + void updateFreeEntries(int num) { freeEntries += num; } + + /** Debug function to print all instructions. */ + void printInsts(); + + private: + /** Does the actual squashing. */ + void doSquash(unsigned tid); + + ///////////////////////// + // Various pointers + ///////////////////////// + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Cache interface. */ + MemInterface *dcacheInterface; +#if 0 + /** Pointer to IEW stage. */ + IEW *iewStage; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit[Impl::MaxThreads]; +#endif + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; +#if 0 + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + + /** Function unit pool. */ + FUPool *fuPool; +#endif + ////////////////////////////////////// + // Instruction lists, ready queues, and ordering + ////////////////////////////////////// + + /** List of all the instructions in the IQ (some of which may be issued). */ + std::list instList[Impl::MaxThreads]; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for an IQ entry. It includes the instruction and an iterator + * to the instruction's spot in the IQ. + */ + struct IQEntry { + DynInstPtr inst; + ListIt iqIt; + }; + + typedef std::priority_queue, pqCompare> + ReadyInstQueue; + + typedef std::map ReadyInstMap; + typedef typename std::map::iterator ReadyMapIt; + + /** List of ready instructions. + */ + ReadyInstQueue readyInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map nonSpecInsts; + + typedef typename std::map::iterator NonSpecMapIt; +#if 0 + /** Entry for the list age ordering by op class. */ + struct ListOrderEntry { + OpClass queueType; + InstSeqNum oldestInst; + }; + + /** List that contains the age order of the oldest instruction of each + * ready queue. Used to select the oldest instruction available + * among op classes. + */ + std::list listOrder; + + typedef typename std::list::iterator ListOrderIt; + + /** Tracks if each ready queue is on the age order list. */ + bool queueOnList[Num_OpClasses]; + + /** Iterators of each ready queue. Points to their spot in the age order + * list. + */ + ListOrderIt readyIt[Num_OpClasses]; + + /** Add an op class to the age order list. */ + void addToOrderList(OpClass op_class); + + /** + * Called when the oldest instruction has been removed from a ready queue; + * this places that ready queue into the proper spot in the age order list. + */ + void moveToYoungerInst(ListOrderIt age_order_it); +#endif + ////////////////////////////////////// + // Various parameters + ////////////////////////////////////// +#if 0 + /** IQ Resource Sharing Policy */ + enum IQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** IQ sharing policy for SMT. */ + IQPolicy iqPolicy; +#endif + /** Number of Total Threads*/ + unsigned numThreads; +#if 0 + /** Pointer to list of active threads. */ + list *activeThreads; +#endif + /** Per Thread IQ count */ + unsigned count[Impl::MaxThreads]; + + /** Max IQ Entries Per Thread */ + unsigned maxEntries[Impl::MaxThreads]; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; +#if 0 + /** The number of physical registers in the CPU. */ + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; +#endif + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt[Impl::MaxThreads]; +#if 0 + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector regScoreboard; + + /** Adds an instruction to the dependency graph, as a producer. */ + bool addToDependents(DynInstPtr &new_inst); + + /** Adds an instruction to the dependency graph, as a consumer. */ + void createDependency(DynInstPtr &new_inst); +#endif + /** Moves an instruction to the ready queue if it is ready. */ + void addIfReady(DynInstPtr &inst); + + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); +#if 0 + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); +#endif + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + /** Debugging function to dump out all instructions that are in the + * IQ. + */ + void dumpInsts(); + + /** Stat for number of instructions added. */ + Stats::Scalar<> iqInstsAdded; + /** Stat for number of non-speculative instructions added. */ + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + /** Stat for number of integer instructions issued. */ + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + /** Stat for number of floating point instructions issued. */ + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + /** Stat for number of branch instructions issued. */ + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + /** Stat for number of memory instructions issued. */ + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + /** Stat for number of miscellaneous instructions issued. */ + Stats::Scalar<> iqMiscInstsIssued; + /** Stat for number of squashed instructions that were ready to issue. */ + Stats::Scalar<> iqSquashedInstsIssued; + /** Stat for number of squashed instructions examined when squashing. */ + Stats::Scalar<> iqSquashedInstsExamined; + /** Stat for number of squashed instruction operands examined when + * squashing. + */ + Stats::Scalar<> iqSquashedOperandsExamined; + /** Stat for number of non-speculative instructions removed due to a squash. + */ + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__CPU_OZONE_INST_QUEUE_HH__ diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh new file mode 100644 index 000000000..0523c68d6 --- /dev/null +++ b/cpu/ozone/inst_queue_impl.hh @@ -0,0 +1,1341 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include + +#include "sim/root.hh" + +#include "cpu/ozone/inst_queue.hh" +#if 0 +template +InstQueue::FUCompletion::FUCompletion(DynInstPtr &_inst, + int fu_idx, + InstQueue *iq_ptr) + : Event(&mainEventQueue, Stat_Event_Pri), + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +InstQueue::FUCompletion::process() +{ + iqPtr->processFUCompletion(inst, fuIdx); +} + + +template +const char * +InstQueue::FUCompletion::description() +{ + return "Functional unit completion event"; +} +#endif +template +InstQueue::InstQueue(Params *params) + : dcacheInterface(params->dcacheInterface), +// fuPool(params->fuPool), + numEntries(params->numIQEntries), + totalWidth(params->issueWidth), +// numPhysIntRegs(params->numPhysIntRegs), +// numPhysFloatRegs(params->numPhysFloatRegs), + commitToIEWDelay(params->commitToIEWDelay) +{ +// assert(fuPool); + +// numThreads = params->numberOfThreads; + numThreads = 1; + + //Initialize thread IQ counts + for (int i = 0; i smtIQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out resource sharing policy + if (policy == "dynamic") { + iqPolicy = Dynamic; + + //Set Max Entries to Total ROB Capacity + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = numEntries; + } + + } else if (policy == "partitioned") { + iqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide ROB up evenly + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = part_amt; + } + + DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + "%i entries per thread.\n",part_amt); + + } else if (policy == "threshold") { + iqPolicy = Threshold; + + double threshold = (double)params->smtIQThreshold / 100; + + int thresholdIQ = (int)((double)threshold * numEntries); + + //Divide up by threshold amount + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = thresholdIQ; + } + + DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + "%i entries per thread.\n",thresholdIQ); + } else { + assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } +*/ +} + +template +InstQueue::~InstQueue() +{ + // Clear the dependency graph +/* + DependencyEntry *curr; + DependencyEntry *prev; + + for (int i = 0; i < numPhysRegs; ++i) { + curr = dependGraph[i].next; + + while (curr) { + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } + + assert(DependencyEntry::mem_alloc_counter == 0); + + delete [] dependGraph; +*/ +} + +template +std::string +InstQueue::name() const +{ + return cpu->name() + ".iq"; +} + +template +void +InstQueue::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); +/* + for ( int i=0; i < numThreads; i++) { + // Tell mem dependence unit to reg stats as well. + memDepUnit[i].regStats(); + } +*/ +} +/* +template +void +InstQueue::setActiveThreads(list *at_ptr) +{ + DPRINTF(IQ, "Setting active threads list pointer.\n"); + activeThreads = at_ptr; +} +*/ +template +void +InstQueue::setIssueToExecuteQueue(TimeBuffer *i2e_ptr) +{ + DPRINTF(IQ, "Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} +/* +template +void +InstQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IQ, "Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +template +int +InstQueue::entryAmount(int num_threads) +{ + if (iqPolicy == Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + + +template +void +InstQueue::resetEntries() +{ + if (iqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list::iterator threads = (*activeThreads).begin(); + list::iterator list_end = (*activeThreads).end(); + + while (threads != list_end) { + if (iqPolicy == Partitioned) { + maxEntries[*threads++] = numEntries / active_threads; + } else if(iqPolicy == Threshold && active_threads == 1) { + maxEntries[*threads++] = numEntries; + } + } + } +} +*/ +template +unsigned +InstQueue::numFreeEntries() +{ + return freeEntries; +} + +template +unsigned +InstQueue::numFreeEntries(unsigned tid) +{ + return maxEntries[tid] - count[tid]; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstQueue::isFull(unsigned tid) +{ + if (numFreeEntries(tid) == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstQueue::hasReadyInsts() +{ +/* + if (!listOrder.empty()) { + return true; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + return true; + } + } + + return false; +*/ + return readyInsts.empty(); +} + +template +void +InstQueue::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); +*/ + // If it's a memory instruction, add it to the memory dependency + // unit. +// if (new_inst->isMemRef()) { +// memDepUnit[new_inst->threadNumber].insert(new_inst); +// } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); +// } + + ++iqInstsAdded; + + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} + +template +void +InstQueue::insertNonSpec(DynInstPtr &new_inst) +{ + nonSpecInsts[new_inst->seqNum] = new_inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } +*/ + ++iqNonSpecInstsAdded; + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} +/* +template +void +InstQueue::advanceTail(DynInstPtr &inst) +{ + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +template +void +InstQueue::addToOrderList(OpClass op_class) +{ + assert(!readyInsts[op_class].empty()); + + ListOrderEntry queue_entry; + + queue_entry.queueType = op_class; + + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + ListOrderIt list_it = listOrder.begin(); + ListOrderIt list_end_it = listOrder.end(); + + while (list_it != list_end_it) { + if ((*list_it).oldestInst > queue_entry.oldestInst) { + break; + } + + list_it++; + } + + readyIt[op_class] = listOrder.insert(list_it, queue_entry); + queueOnList[op_class] = true; +} + +template +void +InstQueue::moveToYoungerInst(ListOrderIt list_order_it) +{ + // Get iterator of next item on the list + // Delete the original iterator + // Determine if the next item is either the end of the list or younger + // than the new instruction. If so, then add in a new iterator right here. + // If not, then move along. + ListOrderEntry queue_entry; + OpClass op_class = (*list_order_it).queueType; + ListOrderIt next_it = list_order_it; + + ++next_it; + + queue_entry.queueType = op_class; + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + while (next_it != listOrder.end() && + (*next_it).oldestInst < queue_entry.oldestInst) { + ++next_it; + } + + readyIt[op_class] = listOrder.insert(next_it, queue_entry); +} + +template +void +InstQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) +{ + // The CPU could have been sleeping until this op completed (*extremely* + // long latency op). Wake it if it was. This may be overkill. + iewStage->wakeCPU(); + + fuPool->freeUnit(fu_idx); + + int &size = issueToExecuteQueue->access(0)->size; + + issueToExecuteQueue->access(0)->insts[size++] = inst; +} +*/ +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template +void +InstQueue::scheduleReadyInsts() +{ + DPRINTF(IQ, "Attempting to schedule ready instructions from " + "the IQ.\n"); + +// IssueStruct *i2e_info = issueToExecuteQueue->access(0); +/* + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + if (!queueOnList[i]) { + addToOrderList(OpClass(i)); + } else if (readyInsts[i].top()->seqNum < + (*readyIt[i]).oldestInst) { + listOrder.erase(readyIt[i]); + addToOrderList(OpClass(i)); + } + } + } + + // Have iterator to head of the list + // While I haven't exceeded bandwidth or reached the end of the list, + // Try to get a FU that can do what this op needs. + // If successful, change the oldestInst to the new top of the list, put + // the queue in the proper place in the list. + // Increment the iterator. + // This will avoid trying to schedule a certain op class if there are no + // FUs that handle it. + ListOrderIt order_it = listOrder.begin(); + ListOrderIt order_end_it = listOrder.end(); + int total_issued = 0; + int exec_queue_slot = i2e_info->size; + + while (exec_queue_slot < totalWidth && order_it != order_end_it) { + OpClass op_class = (*order_it).queueType; + + assert(!readyInsts[op_class].empty()); + + DynInstPtr issuing_inst = readyInsts[op_class].top(); + + assert(issuing_inst->seqNum == (*order_it).oldestInst); + + if (issuing_inst->isSquashed()) { + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + listOrder.erase(order_it++); + + ++iqSquashedInstsIssued; + + continue; + } + + int idx = fuPool->getUnit(op_class); + + if (idx != -1) { + int op_latency = fuPool->getOpLatency(op_class); + + if (op_latency == 1) { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } else { + int issue_latency = fuPool->getIssueLatency(op_class); + + if (issue_latency > 1) { + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); + + execution->schedule(curTick + issue_latency - 1); + } else { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } + } + + DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " + "[sn:%lli]\n", + issuing_inst->threadNumber, issuing_inst->readPC(), + issuing_inst->seqNum); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + issuing_inst->setIssued(); + ++total_issued; + + if (!issuing_inst->isMemRef()) { + // Memory instructions can not be freed from the IQ until they + // complete. + ++freeEntries; + count[issuing_inst->threadNumber]--; + issuing_inst->removeInIQ(); + } else { + memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); + } + + listOrder.erase(order_it++); + } else { + ++order_it; + } + } + + if (total_issued) { + cpu->activityThisCycle(); + } else { + DPRINTF(IQ, "Not able to schedule any instructions.\n"); + } +*/ +} + +template +void +InstQueue::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + +// unsigned tid = (*inst_it).second->threadNumber; + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. +// if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); +// } else { +// memDepUnit[tid].nonSpecInstReady((*inst_it).second); +// } + + nonSpecInsts.erase(inst_it); +} + +template +void +InstQueue::commit(const InstSeqNum &inst, unsigned tid) +{ + /*Need to go through each thread??*/ + DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", + tid,inst); + + ListIt iq_it = instList[tid].begin(); + + while (iq_it != instList[tid].end() && + (*iq_it)->seqNum <= inst) { + ++iq_it; + instList[tid].pop_front(); + } + + assert(freeEntries == (numEntries - countInsts())); +} + +template +void +InstQueue::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); + // Look at the physical destination register of the DynInst + // and look it up on the dependency graph. Then mark as ready + // any instructions within the instruction queue. +/* + DependencyEntry *curr; + DependencyEntry *prev; +*/ + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. Also complete the memory + // instruction at this point since we know it executed fine. + // @todo: Might want to rename "completeMemInst" to + // something that indicates that it won't need to be replayed, and call + // this earlier. Might not be a big deal. + if (completed_inst->isMemRef()) { +// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); + completeMemInst(completed_inst); + } + completed_inst->wakeDependents(); +/* + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + + curr = dependGraph[dest_reg].next; + + while (curr) { + DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +*/ +} + +template +void +InstQueue::addReadyMemInst(DynInstPtr &ready_inst) +{ + OpClass op_class = ready_inst->opClass(); + + readyInsts.push(ready_inst); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + ready_inst->readPC(), op_class, ready_inst->seqNum); +} +/* +template +void +InstQueue::rescheduleMemInst(DynInstPtr &resched_inst) +{ + memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); +} + +template +void +InstQueue::replayMemInst(DynInstPtr &replay_inst) +{ + memDepUnit[replay_inst->threadNumber].replay(replay_inst); +} +*/ +template +void +InstQueue::completeMemInst(DynInstPtr &completed_inst) +{ + int tid = completed_inst->threadNumber; + + DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n", + completed_inst->readPC(), completed_inst->seqNum); + + ++freeEntries; + +// completed_inst->memOpDone = true; + +// memDepUnit[tid].completed(completed_inst); + + count[tid]--; +} +/* +template +void +InstQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit[store->threadNumber].violation(store, faulting_load); +} +*/ +template +void +InstQueue::squash(unsigned tid) +{ + DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in " + "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. +// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt[tid] = instList[tid].end(); + --squashIt[tid]; + + // Call doSquash if there are insts in the IQ + if (count[tid] > 0) { + doSquash(tid); + } + + // Also tell the memory dependence unit to squash. +// memDepUnit[tid].squash(squashedSeqNum[tid], tid); +} + +template +void +InstQueue::doSquash(unsigned tid) +{ + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum[tid] != 0); + + DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", + tid, squashedSeqNum[tid]); + + // Squash any instructions younger than the squashed sequence number + // given. + while (squashIt[tid] != instList[tid].end() && + (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { + + DynInstPtr squashed_inst = (*squashIt[tid]); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (squashed_inst->threadNumber != tid || + squashed_inst->isSquashedInIQ()) { + --squashIt[tid]; + continue; + } + + if (!squashed_inst->isIssued() || + (squashed_inst->isMemRef()/* && + !squashed_inst->memOpDone*/)) { + + // Remove the instruction from the dependency list. + if (!squashed_inst->isNonSpeculative()) { +/* + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + // Instead of doing a linked list traversal, we can just + // remove these squashed instructions either at issue time, + // or when the register is overwritten. The only downside + // to this is it leaves more room for error. + + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + + ++iqSquashedOperandsExamined; + } +*/ + // Might want to remove producers as well. + } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + // @todo: Remove this hack where several statuses are set so the + // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); +// squashed_inst->removeInIQ(); + + //Update Thread IQ Count + count[squashed_inst->threadNumber]--; + + ++freeEntries; + + if (numThreads > 1) { + DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n", + tid, squashed_inst->readPC()); + } else { + DPRINTF(IQ, "Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + } + + --squashIt[tid]; + ++iqSquashedInstsExamined; + } +} +/* +template +void +InstQueue::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template +void +InstQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template +bool +InstQueue::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template +void +InstQueue::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("Dependency graph %i not empty!", dest_reg); + } + + dependGraph[dest_reg].inst = new_inst; + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} +*/ +template +void +InstQueue::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isMemRef()) { + + DPRINTF(IQ, "Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + +// memDepUnit[inst->threadNumber].regsReady(inst); + + return; + } + + OpClass op_class = inst->opClass(); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + inst->readPC(), op_class, inst->seqNum); + + readyInsts.push(inst); + } +} + +template +int +InstQueue::countInsts() +{ + //ksewell:This works but definitely could use a cleaner write + //with a more intuitive way of counting. Right now it's + //just brute force .... + +#if 0 + int total_insts = 0; + + for (int i = 0; i < numThreads; ++i) { + ListIt count_it = instList[i].begin(); + + while (count_it != instList[i].end()) { + if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } else if ((*count_it)->isMemRef() && + !(*count_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++total_insts; + } + } + + ++count_it; + } + } + + return total_insts; +#else + return numEntries - freeEntries; +#endif +} +/* +template +void +InstQueue::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } +} +*/ +template +void +InstQueue::dumpLists() +{ + for (int i = 0; i < Num_OpClasses; ++i) { + cprintf("Ready list %i size: %i\n", i, readyInsts.size()); + + cprintf("\n"); + } + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + NonSpecMapIt non_spec_it = nonSpecInsts.begin(); + NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); + + cprintf("Non speculative list: "); + + while (non_spec_it != non_spec_end_it) { + cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(), + (*non_spec_it).second->seqNum); + ++non_spec_it; + } + + cprintf("\n"); +/* + ListOrderIt list_order_it = listOrder.begin(); + ListOrderIt list_order_end_it = listOrder.end(); + int i = 1; + + cprintf("List order: "); + + while (list_order_it != list_order_end_it) { + cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType, + (*list_order_it).oldestInst); + + ++list_order_it; + ++i; + } +*/ + cprintf("\n"); +} + + +template +void +InstQueue::dumpInsts() +{ + for (int i = 0; i < numThreads; ++i) { +// int num = 0; +// int valid_num = 0; +/* + ListIt inst_list_it = instList[i].begin(); + + while (inst_list_it != instList[i].end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +*/ + } +} diff --git a/cpu/ozone/lsq_unit.cc b/cpu/ozone/lsq_unit.cc new file mode 100644 index 000000000..3ac51b87d --- /dev/null +++ b/cpu/ozone/lsq_unit.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLSQ; + diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh new file mode 100644 index 000000000..3c3e3988c --- /dev/null +++ b/cpu/ozone/lsq_unit.hh @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LSQ_UNIT_HH__ +#define __CPU_OZONE_LSQ_UNIT_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template +class OzoneLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + int storeIdx; + /** The writeback event for the store. Needed for store + * conditionals. + */ + Event *wbEvent; + /** The pointer to the LSQ unit that issued the store. */ + OzoneLSQ *lsqPtr; + }; + + friend class StoreCompletionEvent; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ + void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx); + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits a specific load, given by the sequence number. */ + void commitLoad(InstSeqNum &inst); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + inline bool loadBlocked(); + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed && + !dcacheInterface->isBlocked(); } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ + PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::vector storeQueue; + + /** The load queue. */ + std::vector loadQueue; + + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that is ready to be written back, + * and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +OzoneLSQ::read(MemReqPtr &req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->isExecuted()) { + panic("Should not reach this point with split ops!"); + + memcpy(&data,req->data,req->size); + + return NoFault; + } + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->flags & UNCACHEABLE && + (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { + + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = loadQueue[load_idx]->sqIdx; + + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->paddr); + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + + int shift_amt = req->vaddr & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->vaddr, *(req->data)); + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + loadQueue[load_idx]->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(loadQueue[load_idx]); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->vaddr); + + return NoFault; + } + } + + + // If there's no forwarding case, then go access memory + DynInstPtr inst = loadQueue[load_idx]; + + ++usedPorts; + + // if we have a cache, do cache access too + if (dcacheInterface) { + if (dcacheInterface->isBlocked()) { + isLoadBlocked = true; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " + "vaddr:%#x flags:%i\n", + inst->readPC(), req->paddr, req->vaddr, req->flags); + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + assert(!req->completionEvent); + req->completionEvent = + new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], be); + + // Do Cache Access + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + // @todo: Probably should support having no events + if (result != MA_HIT) { + DPRINTF(OzoneLSQ, "D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + } else { +// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + DPRINTF(OzoneLSQ, "D-cache hit!\n"); + } + } else { + fatal("Must use D-cache with new memory system"); + } + + return NoFault; +} + +template +template +Fault +OzoneLSQ::write(MemReqPtr &req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->paddr, data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +template +inline bool +OzoneLSQ::loadBlocked() +{ + bool ret_val = isLoadBlocked; + isLoadBlocked = false; + return ret_val; +} + +#endif // __CPU_OZONE_LSQ_UNIT_HH__ diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh new file mode 100644 index 000000000..6c7977250 --- /dev/null +++ b/cpu/ozone/lsq_unit_impl.hh @@ -0,0 +1,846 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lsq_unit.hh" + +template +OzoneLSQ::StoreCompletionEvent::StoreCompletionEvent(int store_idx, + Event *wb_event, + OzoneLSQ *lsq_ptr) + : Event(&mainEventQueue), + storeIdx(store_idx), + wbEvent(wb_event), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +OzoneLSQ::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (wbEvent) + wbEvent->process(); + lsqPtr->completeStore(storeIdx); +} + +template +const char * +OzoneLSQ::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template +OzoneLSQ::OzoneLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false) +{ +} + +template +void +OzoneLSQ::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) + +{ + DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + + // May want to initialize these entries to NULL + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; +} + +template +std::string +OzoneLSQ::name() const +{ + return "lsqunit"; +} + +template +void +OzoneLSQ::clearLQ() +{ + loadQueue.clear(); +} + +template +void +OzoneLSQ::clearSQ() +{ + storeQueue.clear(); +} + +template +void +OzoneLSQ::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} + +template +void +OzoneLSQ::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template +void +OzoneLSQ::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template +void +OzoneLSQ::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + +// inst->setInLSQ(); +} + +template +void +OzoneLSQ::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template +void +OzoneLSQ::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; + +} + +template +typename Impl::DynInstPtr +OzoneLSQ::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +OzoneLSQ::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +OzoneLSQ::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +#if 0 +template +Fault +OzoneLSQ::executeLoad() +{ + Fault load_fault = NoFault; + DynInstPtr load_inst; + + assert(readyLoads.size() != 0); + + // Execute a ready load. + LdMapIt ready_it = readyLoads.begin(); + + load_inst = (*ready_it).second; + + // Execute the instruction, which is held in the data portion of the + // iterator. + load_fault = load_inst->execute(); + + // If it executed successfully, then switch it over to the executed + // loads list. + if (load_fault == NoFault) { + executedLoads[load_inst->seqNum] = load_inst; + + readyLoads.erase(ready_it); + } else { + loadFaultInst = load_inst; + } + + return load_fault; +} +#endif + +template +Fault +OzoneLSQ::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. +// iewStage->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +OzoneLSQ::executeLoad(int lq_idx) +{ + // Very hackish. Not sure the best way to check that this + // instruction is at the head of the ROB. I should have some sort + // of extra information here so that I'm not overloading the + // canCommit signal for 15 different things. + loadQueue[lq_idx]->setCanCommit(); + Fault ret_fault = executeLoad(loadQueue[lq_idx]); + loadQueue[lq_idx]->clearCanCommit(); + return ret_fault; +} + +template +Fault +OzoneLSQ::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = storeQueue[store_idx].size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isNonSpeculative()) { + // Nonspeculative accesses (namely store conditionals) + // need to set themselves as able to writeback if we + // haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return TheISA::genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +OzoneLSQ::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template +void +OzoneLSQ::commitLoad(InstSeqNum &inst) +{ + // Hopefully I don't use this function too much + panic("Don't use this function!"); + + int i = loadHead; + while (1) { + if (i == loadTail) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i]->seqNum == inst) { + break; + } + + ++i; + if (i >= LQEntries) { + i = 0; + } + } + +// loadQueue[i]->removeInLSQ(); + loadQueue[i] = NULL; + --loads; +} + +template +void +OzoneLSQ::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +OzoneLSQ::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + +// --stores; + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template +void +OzoneLSQ::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + MemReqPtr req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + +// Fault fault = cpu->translateDataReadReq(req); + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), + req->paddr, *(req->data), + storeQueue[storeWBIdx].inst->seqNum); + +// if (fault != NoFault) { + //What should we do if there is a fault??? + //for now panic +// panic("Page Table Fault!!!!!\n"); +// } + + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + //@todo temp fix for LL/SC (works fine for 1 CPU) + if (req->flags & LOCKED) { + req->result=1; + panic("LL/SC! oh no no support!!!"); + } + + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + Event *wb = NULL; +/* + typename IEW::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=0; + wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + } +*/ + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + // Will stores need their own kind of writeback events? + // Do stores even need writeback events? + assert(!req->completionEvent); + req->completionEvent = new + StoreCompletionEvent(storeWBIdx, wb, this); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // Increment stat here or something + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=1; + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst, + be); + wb->schedule(curTick); + } + + completeStore(storeWBIdx); + } + + incrStIdx(storeWBIdx); + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template +void +OzoneLSQ::removeMSHR(InstSeqNum seqNum) +{ + list::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template +void +OzoneLSQ::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + + loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + if (storeQueue[store_idx].req) { + assert(!storeQueue[store_idx].req->completionEvent); + } + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template +void +OzoneLSQ::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum, + loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum, + storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} + +template +void +OzoneLSQ::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. +// cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + +// be->updateLSQNextCycle = true; + } + + DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } +} + +template +inline void +OzoneLSQ::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template +inline void +OzoneLSQ::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template +inline void +OzoneLSQ::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template +inline void +OzoneLSQ::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} diff --git a/cpu/ozone/null_predictor.hh b/cpu/ozone/null_predictor.hh new file mode 100644 index 000000000..d19e2cd1c --- /dev/null +++ b/cpu/ozone/null_predictor.hh @@ -0,0 +1,76 @@ + +#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__ +#define __CPU_OZONE_NULL_PREDICTOR_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +template +class NullPredictor +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + NullPredictor(Params *p) { } + + struct BPredInfo { + BPredInfo() + : PC(0), nextPC(0) + { } + + BPredInfo(const Addr &pc, const Addr &next_pc) + : PC(pc), nextPC(next_pc) + { } + + Addr PC; + Addr nextPC; + }; + + BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); } + + void undo(BPredInfo &bp_info) { return; } + + /** + * Predicts whether or not the instruction is a taken branch, and the + * target of the branch if it is taken. + * @param inst The branch instruction. + * @param PC The predicted PC is passed back through this parameter. + * @param tid The thread id. + * @return Returns if the branch is taken or not. + */ + bool predict(DynInstPtr &inst, Addr &PC, unsigned tid) + { return false; } + + /** + * Tells the branch predictor to commit any updates until the given + * sequence number. + * @param done_sn The sequence number to commit any older updates up until. + * @param tid The thread id. + */ + void update(const InstSeqNum &done_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number, and + * corrects that sn's update with the proper address and taken/not taken. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param corr_target The correct branch target. + * @param actually_taken The correct branch direction. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken, unsigned tid) + { } + +}; + +#endif // __CPU_OZONE_NULL_PREDICTOR_HH__ diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh new file mode 100644 index 000000000..a2c706c60 --- /dev/null +++ b/cpu/ozone/ozone_impl.hh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_OZONE_IMPL_HH__ +#define __CPU_OZONE_OZONE_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/back_end.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inst_queue.hh" +#include "cpu/ozone/lsq_unit.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +template +class OzoneCPU; + +template +class OzoneDynInst; + +struct OzoneImpl { + typedef SimpleParams Params; + typedef OzoneCPU OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit BranchPred; + typedef FrontEnd FrontEnd; + // Will need IQ, LSQ eventually + typedef BackEnd BackEnd; + + typedef InstQueue InstQueue; + typedef OzoneLSQ LdstQueue; + + typedef OzoneDynInst DynInst; + typedef RefCountingPtr DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_OZONE_IMPL_HH__ diff --git a/cpu/ozone/rename_table.cc b/cpu/ozone/rename_table.cc new file mode 100644 index 000000000..fff41903e --- /dev/null +++ b/cpu/ozone/rename_table.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/rename_table_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class RenameTable; +template class RenameTable; diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh new file mode 100644 index 000000000..afbf6ff32 --- /dev/null +++ b/cpu/ozone/rename_table.hh @@ -0,0 +1,25 @@ +#ifndef __CPU_OZONE_RENAME_TABLE_HH__ +#define __CPU_OZONE_RENAME_TABLE_HH__ + +#include "arch/isa_traits.hh" + +/** Rename table that holds the rename of each architectural register to + * producing DynInst. Needs to support copying from one table to another. + */ + +template +class RenameTable { + public: + typedef typename Impl::DynInstPtr DynInstPtr; + + RenameTable(); + + void copyFrom(const RenameTable &table_to_copy); + + DynInstPtr &operator [] (int index) + { return table[index]; } + + DynInstPtr table[TheISA::TotalNumRegs]; +}; + +#endif // __CPU_OZONE_RENAME_TABLE_HH__ diff --git a/cpu/ozone/rename_table_impl.hh b/cpu/ozone/rename_table_impl.hh new file mode 100644 index 000000000..86fc1cc55 --- /dev/null +++ b/cpu/ozone/rename_table_impl.hh @@ -0,0 +1,23 @@ + +#include // Not really sure what to include to get NULL +#include "cpu/ozone/rename_table.hh" + +template +RenameTable::RenameTable() +{ + // Actually should set these to dummy dyn insts that have the initial value + // and force their values to be initialized. This keeps everything the + // same. + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = NULL; + } +} + +template +void +RenameTable::copyFrom(const RenameTable &table_to_copy) +{ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = table_to_copy.table[i]; + } +} diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh new file mode 100644 index 000000000..961bf2ea9 --- /dev/null +++ b/cpu/ozone/simple_impl.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__ +#define __CPU_OZONE_SIMPLE_IMPL_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +//template +//class OzoneCPU; + +template +class OzoneDynInst; + +struct SimpleImpl { + typedef SimpleParams Params; + typedef OzoneCPU OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit BranchPred; + typedef FrontEnd FrontEnd; + // Will need IQ, LSQ eventually + typedef InorderBackEnd BackEnd; + + typedef OzoneDynInst DynInst; + typedef RefCountingPtr DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_SIMPLE_IMPL_HH__ diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh new file mode 100644 index 000000000..e503654aa --- /dev/null +++ b/cpu/ozone/simple_params.hh @@ -0,0 +1,164 @@ + + +#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ +#define __CPU_OZONE_SIMPLE_PARAMS_HH__ + +#include "cpu/ozone/cpu.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class FUPool; +class FunctionalMemory; +class MemInterface; +class PageTable; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the OzoneCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class SimpleParams : public BaseCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector workload; +// Process *process; +#endif // FULL_SYSTEM + + //Page Table + PageTable *pTable; + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + unsigned cachePorts; + unsigned width; + unsigned frontEndWidth; + unsigned backEndWidth; + unsigned backEndSquashLatency; + unsigned backEndLatency; + unsigned maxInstBufferSize; + unsigned numPhysicalRegs; + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + FUPool *fuPool; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // + unsigned localPredictorSize; + unsigned localCtrBits; + unsigned localHistoryTableSize; + unsigned localHistoryBits; + unsigned globalPredictorSize; + unsigned globalCtrBits; + unsigned globalHistoryBits; + unsigned choicePredictorSize; + unsigned choiceCtrBits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + bool decoupledFrontEnd; + int dispatchWidth; + int wbWidth; + + //SMT Parameters + unsigned smtNumFetchingThreads; + + std::string smtFetchPolicy; + + std::string smtIQPolicy; + unsigned smtIQThreshold; + + std::string smtLSQPolicy; + unsigned smtLSQThreshold; + + std::string smtCommitPolicy; + + std::string smtROBPolicy; + unsigned smtROBThreshold; + + // Probably can get this from somewhere. + unsigned instShiftAmt; +}; + +#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__ diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh new file mode 100644 index 000000000..c6d23a63b --- /dev/null +++ b/cpu/ozone/thread_state.hh @@ -0,0 +1,171 @@ + +#ifndef __CPU_OZONE_THREAD_STATE_HH__ +#define __CPU_OZONE_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" + +class Event; +class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class Process; +class FunctionalMemory; +#endif + +// Maybe this ozone thread state should only really have committed state? +// I need to think about why I'm using this and what it's useful for. Clearly +// has benefits for SMT; basically serves same use as CPUExecContext. +// Makes the ExecContext proxy easier. Gives organization/central access point +// to state of a thread that can be accessed normally (i.e. not in-flight +// stuff within a OoO processor). Does this need an XC proxy within it? +template +struct OzoneThreadState : public ThreadState { + typedef typename ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + typedef TheISA::MiscReg MiscReg; + +#if FULL_SYSTEM + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#else + OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, NULL, _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } + + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#endif + + Status _status; + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + + RenameTable renameTable; // Should I include backend and frontend + // tables here? For the ozone CPU, maybe, for the new full CPU, probably + // not...you wouldn't want threads just accessing the backend/frontend + // rename tables. + Addr PC; // What should these be set to? Probably the committed ones. + Addr nextPC; + + // Current instruction? + TheISA::MachInst inst; + + TheISA::RegFile regs; + // Front end? Back end? +// MemReqPtr memReq; + + typename Impl::FullCPU *cpu; + + bool inSyscall; + + bool trapPending; + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + +#if !FULL_SYSTEM + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } +#else + Fault translateInstReq(MemReqPtr &req) + { + return cpu->itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return cpu->dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return cpu->dtb->translate(req, true); + } +#endif + + MiscReg readMiscReg(int misc_reg) + { + return regs.miscRegs.readReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.miscRegs.readRegWithEffect(misc_reg, fault, xcProxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.miscRegs.setReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.miscRegs.setRegWithEffect(misc_reg, val, xcProxy); + } + + uint64_t readPC() + { return PC; } + + void setPC(uint64_t val) + { PC = val; } + + uint64_t readNextPC() + { return nextPC; } + + void setNextPC(uint64_t val) + { nextPC = val; } + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } +}; + +#endif // __CPU_OZONE_THREAD_STATE_HH__ -- cgit v1.2.3 From 6b4396111ba26fd16c7cf0047c4cb3e13036c298 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 22 Apr 2006 19:10:39 -0400 Subject: Updates for OzoneCPU. cpu/static_inst.hh: Updates for new CPU, also include a classification of quiesce instructions. --HG-- extra : convert_revision : a34cd56da88fe57d7de24674fbb375bbf13f887f --- cpu/ozone/back_end.cc | 2 +- cpu/ozone/back_end.hh | 7 + cpu/ozone/back_end_impl.hh | 85 ++- cpu/ozone/cpu.hh | 20 +- cpu/ozone/cpu_builder.cc | 12 +- cpu/ozone/cpu_impl.hh | 54 +- cpu/ozone/front_end.hh | 4 + cpu/ozone/front_end_impl.hh | 13 +- cpu/ozone/inorder_back_end.hh | 1 + cpu/ozone/lsq_unit.hh | 9 +- cpu/ozone/lsq_unit_impl.hh | 4 +- cpu/ozone/lw_back_end.cc | 5 + cpu/ozone/lw_back_end.hh | 503 ++++++++++++++ cpu/ozone/lw_back_end_impl.hh | 1486 +++++++++++++++++++++++++++++++++++++++++ cpu/ozone/lw_lsq.cc | 34 + cpu/ozone/lw_lsq.hh | 649 ++++++++++++++++++ cpu/ozone/lw_lsq_impl.hh | 766 +++++++++++++++++++++ cpu/ozone/ozone_impl.hh | 6 +- 18 files changed, 3612 insertions(+), 48 deletions(-) create mode 100644 cpu/ozone/lw_back_end.cc create mode 100644 cpu/ozone/lw_back_end.hh create mode 100644 cpu/ozone/lw_back_end_impl.hh create mode 100644 cpu/ozone/lw_lsq.cc create mode 100644 cpu/ozone/lw_lsq.hh create mode 100644 cpu/ozone/lw_lsq_impl.hh (limited to 'cpu/ozone') diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc index dbab5435e..cb014e4cc 100644 --- a/cpu/ozone/back_end.cc +++ b/cpu/ozone/back_end.cc @@ -2,4 +2,4 @@ #include "cpu/ozone/back_end_impl.hh" #include "cpu/ozone/ozone_impl.hh" -template class BackEnd; +//template class BackEnd; diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh index 0713a0143..14b011ab8 100644 --- a/cpu/ozone/back_end.hh +++ b/cpu/ozone/back_end.hh @@ -125,6 +125,7 @@ class BackEnd InstList nonSpec; InstList replayList; ReadyInstQueue readyQueue; + public: int size; int numInsts; int width; @@ -321,6 +322,12 @@ class BackEnd int numROBEntries; int numInsts; + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + private: typedef typename std::list::iterator InstListIt; diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh index 807afaf2e..0b0f04f59 100644 --- a/cpu/ozone/back_end_impl.hh +++ b/cpu/ozone/back_end_impl.hh @@ -100,6 +100,7 @@ BackEnd::InstQueue::insert(DynInstPtr &inst) numInsts++; inst_count[0]++; if (!inst->isNonSpeculative()) { + DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum); if (inst->readyToIssue()) { toBeScheduled.push_front(inst); inst->iqIt = toBeScheduled.begin(); @@ -110,6 +111,7 @@ BackEnd::InstQueue::insert(DynInstPtr &inst) inst->iqItValid = true; } } else { + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum); nonSpec.push_front(inst); inst->iqIt = nonSpec.begin(); inst->iqItValid = true; @@ -159,6 +161,8 @@ BackEnd::InstQueue::scheduleNonSpec(const InstSeqNum &sn) */ DynInstPtr inst = nonSpec.back(); + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum); + assert(inst->seqNum == sn); assert(find(NonSpec, inst->iqIt)); @@ -193,6 +197,7 @@ BackEnd::InstQueue::squash(const InstSeqNum &sn) InstListIt iq_end_it = iq.end(); while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); (*iq_it)->iqItValid = false; iq.erase(iq_it++); --numInsts; @@ -202,6 +207,7 @@ BackEnd::InstQueue::squash(const InstSeqNum &sn) iq_end_it = nonSpec.end(); while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); (*iq_it)->iqItValid = false; nonSpec.erase(iq_it++); --numInsts; @@ -212,6 +218,7 @@ BackEnd::InstQueue::squash(const InstSeqNum &sn) while (iq_it != iq_end_it) { if ((*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); (*iq_it)->iqItValid = false; replayList.erase(iq_it++); --numInsts; @@ -243,20 +250,24 @@ BackEnd::InstQueue::wakeDependents(DynInstPtr &inst) std::vector &dependents = inst->getDependents(); int num_outputs = dependents.size(); + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + for (int i = 0; i < num_outputs; i++) { - DynInstPtr inst = dependents[i]; - inst->markSrcRegReady(); - if (inst->readyToIssue() && inst->iqItValid) { - if (inst->isNonSpeculative()) { - assert(find(NonSpec, inst->iqIt)); - nonSpec.erase(inst->iqIt); + DynInstPtr dep_inst = dependents[i]; + dep_inst->markSrcRegReady(); + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->iqItValid) { + if (dep_inst->isNonSpeculative()) { + assert(find(NonSpec, dep_inst->iqIt)); + nonSpec.erase(dep_inst->iqIt); } else { - assert(find(IQ, inst->iqIt)); - iq.erase(inst->iqIt); + assert(find(IQ, dep_inst->iqIt)); + iq.erase(dep_inst->iqIt); } - toBeScheduled.push_front(inst); - inst->iqIt = toBeScheduled.begin(); + toBeScheduled.push_front(dep_inst); + dep_inst->iqIt = toBeScheduled.begin(); } } return num_outputs; @@ -266,6 +277,7 @@ template void BackEnd::InstQueue::rescheduleMemInst(DynInstPtr &inst) { + DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum); assert(!inst->iqItValid); replayList.push_front(inst); inst->iqIt = replayList.begin(); @@ -277,11 +289,14 @@ template void BackEnd::InstQueue::replayMemInst(DynInstPtr &inst) { + DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum); assert(find(ReplayList, inst->iqIt)); InstListIt iq_it = --replayList.end(); InstListIt iq_end_it = replayList.end(); while (iq_it != iq_end_it) { DynInstPtr rescheduled_inst = (*iq_it); + + DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum); replayList.erase(iq_it--); toBeScheduled.push_front(rescheduled_inst); rescheduled_inst->iqIt = toBeScheduled.begin(); @@ -952,6 +967,9 @@ BackEnd::tick() commitInsts(); + DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n", + IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores()); + assert(numInsts == instList.size()); } @@ -1034,11 +1052,11 @@ BackEnd::dispatchInsts() // Get instruction from front of time buffer DynInstPtr inst = dispatch.front(); dispatch.pop_front(); + --dispatchSize; if (inst->isSquashed()) continue; - --dispatchSize; ++numInsts; instList.push_back(inst); @@ -1118,6 +1136,7 @@ template void BackEnd::checkDispatchStatus() { + DPRINTF(BE, "Checking dispatch status\n"); assert(dispatchStatus == Blocked); if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { DPRINTF(BE, "Dispatch no longer blocked\n"); @@ -1526,6 +1545,24 @@ BackEnd::commitInst(int inst_num) // Write the done sequence number here. toIEW->doneSeqNum = inst->seqNum; +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); +// completed_last_inst = false; +// squashPending = true; + return false; + } +#endif return true; } @@ -1566,7 +1603,11 @@ BackEnd::squash(const InstSeqNum &sn) while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) { - DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); @@ -1576,9 +1617,12 @@ BackEnd::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); + // Be careful with IPRs and such here for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { - renameTable[(*insts_it)->destRegIdx(i)] = - (*insts_it)->getPrevDestInst(i); + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; ++freed_regs; } @@ -1592,7 +1636,11 @@ BackEnd::squash(const InstSeqNum &sn) while (!instList.empty() && (*insts_it)->seqNum > sn) { - DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n", + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", (*insts_it)->readPC(), (*insts_it)->seqNum); @@ -1603,8 +1651,10 @@ BackEnd::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { - renameTable[(*insts_it)->destRegIdx(i)] = - (*insts_it)->getPrevDestInst(i); + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; ++freed_regs; } @@ -1649,6 +1699,7 @@ template void BackEnd::fetchFault(Fault &fault) { + faultFromFetch = fault; } template diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 200ced265..17e0f5c42 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -42,6 +42,7 @@ #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" #include "mem/mem_interface.hh" +#include "mem/page_table.hh" #include "sim/eventq.hh" // forward declarations @@ -427,34 +428,22 @@ class OzoneCPU : public BaseCPU int getInstAsid() { return thread.asid; } int getDataAsid() { return thread.asid; } - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - /** Translates instruction requestion in syscall emulation mode. */ Fault translateInstReq(MemReqPtr &req) { - return dummyTranslation(req); + return this->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ Fault translateDataReadReq(MemReqPtr &req) { - return dummyTranslation(req); + return this->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ Fault translateDataWriteReq(MemReqPtr &req) { - return dummyTranslation(req); + return this->pTable->translate(req); } #endif /** CPU read function, forwards read to LSQ. */ @@ -500,6 +489,7 @@ class OzoneCPU : public BaseCPU bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } bool simPalCheck(int palFunc); + void processInterrupts(); #else void syscall(); void setSyscallReturn(SyscallReturn return_value, int tid); diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 0146dd1bd..8ac6858b0 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -45,7 +45,7 @@ SimObjectParam itb; SimObjectParam dtb; #else SimObjectVectorParam workload; -//SimObjectParam page_table; +SimObjectParam page_table; #endif // FULL_SYSTEM SimObjectParam mem; @@ -159,7 +159,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM(dtb, "Data translation buffer"), #else INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), + INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM INIT_PARAM_DFLT(mem, "Memory", NULL), @@ -310,7 +310,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->dtb = dtb; #else params->workload = workload; -// params->pTable = page_table; + params->pTable = page_table; #endif // FULL_SYSTEM params->mem = mem; @@ -440,7 +440,7 @@ SimObjectParam itb; SimObjectParam dtb; #else SimObjectVectorParam workload; -//SimObjectParam page_table; +SimObjectParam page_table; #endif // FULL_SYSTEM SimObjectParam mem; @@ -554,7 +554,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM(dtb, "Data translation buffer"), #else INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), + INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM INIT_PARAM_DFLT(mem, "Memory", NULL), @@ -705,7 +705,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) params->dtb = dtb; #else params->workload = workload; -// params->pTable = page_table; + params->pTable = page_table; #endif // FULL_SYSTEM params->mem = mem; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 36ec30b2c..c205ad319 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -765,7 +765,7 @@ void OzoneCPU::squashFromXC() { thread.inSyscall = true; - backEnd->squashFromXC(); + backEnd->generateXCEvent(); } #if !FULL_SYSTEM @@ -832,6 +832,58 @@ OzoneCPU::hwrei() return NoFault; } +template +void +OzoneCPU::processInterrupts() +{ + // Check for interrupts here. For now can copy the code that + // exists within isa_fullsys_traits.hh. Also assume that thread 0 + // is the one that handles the interrupts. + + // Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + checkInterrupts = false; + + if (thread.readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (thread.readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) { + thread.setMiscReg(IPR_ISR, summary); + thread.setMiscReg(IPR_INTID, ipl); + Fault fault = new InterruptFault; + fault->invoke(thread.getXCProxy()); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + thread.readMiscReg(IPR_IPLR), ipl, summary); + } +} + template bool OzoneCPU::simPalCheck(int palFunc) diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 5e257b506..251f4200c 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -76,6 +76,10 @@ class FrontEnd bool processBarriers(DynInstPtr &inst); void handleFault(Fault &fault); + public: + Fault getFault() { return fetchFault; } + private: + Fault fetchFault; // Align an address (typically a PC) to the start of an I-cache block. // We fold in the PISA 64- to 32-bit conversion here as well. diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 0136d0ef0..af452fe95 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -48,6 +48,7 @@ FrontEnd::FrontEnd(Params *params) #if !FULL_SYSTEM pTable = params->pTable; #endif + fetchFault = NoFault; } template @@ -273,6 +274,7 @@ FrontEnd::tick() Fault fault = fetchCacheLine(); if (fault != NoFault) { handleFault(fault); + fetchFault = fault; return; } fetchCacheLineNextCycle = false; @@ -349,7 +351,7 @@ FrontEnd::fetchCacheLine() // Read a cache line, based on the current PC. #if FULL_SYSTEM // Flag to say whether or not address is physical addr. - unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; + unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0; #else unsigned flags = 0; #endif // FULL_SYSTEM @@ -503,6 +505,9 @@ FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", squash_num, next_PC); + if (fetchFault != NoFault) + fetchFault = NoFault; + while (!instBuffer.empty() && instBuffer.back()->seqNum > squash_num) { DynInstPtr inst = instBuffer.back(); @@ -604,9 +609,13 @@ FrontEnd::addFreeRegs(int num_freed) status = Running; } + DPRINTF(FE, "Adding %i freed registers\n", num_freed); + freeRegs+= num_freed; - assert(freeRegs <= numPhysRegs); +// assert(freeRegs <= numPhysRegs); + if (freeRegs > numPhysRegs) + freeRegs = numPhysRegs; } template diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh index e621f6c01..6519b79e5 100644 --- a/cpu/ozone/inorder_back_end.hh +++ b/cpu/ozone/inorder_back_end.hh @@ -54,6 +54,7 @@ class InorderBackEnd void squash(const InstSeqNum &squash_num, const Addr &next_PC); void squashFromXC(); + void generateXCEvent() { } bool robEmpty() { return instList.empty(); } diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh index 3c3e3988c..4b600af67 100644 --- a/cpu/ozone/lsq_unit.hh +++ b/cpu/ozone/lsq_unit.hh @@ -567,8 +567,11 @@ OzoneLSQ::read(MemReqPtr &req, T &data, int load_idx) req->data = new uint8_t[64]; assert(!req->completionEvent); - req->completionEvent = - new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], be); + typedef typename BackEnd::LdWritebackEvent LdWritebackEvent; + + LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be); + + req->completionEvent = wb; // Do Cache Access MemAccessResult result = dcacheInterface->access(req); @@ -586,6 +589,8 @@ OzoneLSQ::read(MemReqPtr &req, T &data, int load_idx) _status = DcacheMissStall; + wb->setDcacheMiss(); + } else { // DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", // inst->seqNum); diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh index 6c7977250..726348d76 100644 --- a/cpu/ozone/lsq_unit_impl.hh +++ b/cpu/ozone/lsq_unit_impl.hh @@ -698,7 +698,7 @@ OzoneLSQ::squash(const InstSeqNum &squashed_num) stallingLoadIdx = 0; } - loadQueue[load_idx]->squashed = true; +// loadQueue[load_idx]->squashed = true; loadQueue[load_idx] = NULL; --loads; @@ -728,7 +728,7 @@ OzoneLSQ::squash(const InstSeqNum &squashed_num) stallingStoreIsn = 0; } - storeQueue[store_idx].inst->squashed = true; +// storeQueue[store_idx].inst->squashed = true; storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; diff --git a/cpu/ozone/lw_back_end.cc b/cpu/ozone/lw_back_end.cc new file mode 100644 index 000000000..8e9a56ef5 --- /dev/null +++ b/cpu/ozone/lw_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/lw_back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +template class LWBackEnd; diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh new file mode 100644 index 000000000..b89957aad --- /dev/null +++ b/cpu/ozone/lw_back_end.hh @@ -0,0 +1,503 @@ + +#ifndef __CPU_OZONE_LW_BACK_END_HH__ +#define __CPU_OZONE_LW_BACK_END_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +class ExecContext; + +template +class OzoneThreadState; + +template +class LWBackEnd +{ + public: + typedef OzoneThreadState Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer d2i; + typename TimeBuffer::wire instsToDispatch; + TimeBuffer i2e; + typename TimeBuffer::wire instsToExecute; + TimeBuffer e2c; + TimeBuffer numInstsToWB; + + TimeBuffer *comm; + typename TimeBuffer::wire toIEW; + typename TimeBuffer::wire fromCommit; + + class TrapEvent : public Event { + private: + LWBackEnd *be; + + public: + TrapEvent(LWBackEnd *_be); + + void process(); + const char *description(); + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + LWBackEnd *be; + + bool dcacheMiss; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + + void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } + }; + + LWBackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void tick(); + void squash(); + void generateXCEvent() { xcSquash = true; } + void squashFromXC(); + void squashFromTrap(); + void checkInterrupts(); + bool trapSquash; + bool xcSquash; + + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + void fetchFault(Fault &fault); + + int wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) { } + + void addDcacheMiss(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeDcacheMiss(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void addWaitingMemOp(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeWaitingMemOp(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void instToCommit(DynInstPtr &inst); + + private: + void generateTrapEvent(Tick latency = 0); + void handleFault(Fault &fault, Tick latency = 0); + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void executeInsts(); + void commitInsts(); + void addToLSQ(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemViolation(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked, + TrapPending + }; + + Status status; + + Status dispatchStatus; + + Status commitStatus; + + Counter funcExeInst; + + private: +// typedef typename Impl::InstQueue InstQueue; + +// InstQueue IQ; + + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable commitRenameTable; + + RenameTable renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + LWBackEnd *be; + + public: + DCacheCompletionEvent(LWBackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int waitingInsts; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + std::set waitingMemOps; + typedef std::set::iterator MemIt; + int numWaitingMemOps; + unsigned maxOutstandingMemOps; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + bool fetchHasFault; + + private: + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + typedef typename std::priority_queue, pqCompare> ReadyInstQueue; + ReadyInstQueue exeList; + + typedef typename std::list::iterator InstListIt; + + std::list instList; + std::list waitingList; + std::list replayList; + std::list writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + bool fetchRedirect[Impl::MaxThreads]; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); +}; + +template +template +Fault +LWBackEnd::read(MemReqPtr &req, T &data, int load_idx) +{ +/* memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + --funcExeInst; + + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } else { + // do functional access + fault = thread->mem->read(memReq, data); + + } + } +*/ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return LSQ.read(req, data, load_idx); +} + +template +template +Fault +LWBackEnd::write(MemReqPtr &req, T &data, int store_idx) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; + memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; + */ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_LW_BACK_END_HH__ diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh new file mode 100644 index 000000000..115821787 --- /dev/null +++ b/cpu/ozone/lw_back_end_impl.hh @@ -0,0 +1,1486 @@ + +#include "encumbered/cpu/full/op_class.hh" +#include "cpu/ozone/lw_back_end.hh" + +template +void +LWBackEnd::generateTrapEvent(Tick latency) +{ + DPRINTF(BE, "Generating trap event\n"); + + TrapEvent *trap = new TrapEvent(this); + + trap->schedule(curTick + latency); + + thread->trapPending = true; +} + +template +int +LWBackEnd::wakeDependents(DynInstPtr &inst) +{ + assert(!inst->isSquashed()); + std::vector &dependents = inst->getDependents(); + int num_outputs = dependents.size(); + + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr dep_inst = dependents[i]; + dep_inst->markSrcRegReady(); + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->isInROB() && + !dep_inst->isNonSpeculative()) { + DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", + dep_inst->seqNum); + exeList.push(dep_inst); + if (dep_inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(dep_inst->iqIt); + waitingInsts--; + dep_inst->iqItValid = false; + assert(waitingInsts >= 0); + } + if (dep_inst->isMemRef()) { + removeWaitingMemOp(dep_inst); + DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n", + dep_inst->seqNum); + } + } + } + return num_outputs; +} + +template +void +LWBackEnd::rescheduleMemInst(DynInstPtr &inst) +{ + replayList.push_front(inst); +} + +template +LWBackEnd::TrapEvent::TrapEvent(LWBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +LWBackEnd::TrapEvent::process() +{ + be->trapSquash = true; +} + +template +const char * +LWBackEnd::TrapEvent::description() +{ + return "Trap event"; +} + +template +void +LWBackEnd::replayMemInst(DynInstPtr &inst) +{ + bool found_inst = false; + while (!replayList.empty()) { + exeList.push(replayList.front()); + if (replayList.front() == inst) { + found_inst = true; + } + replayList.pop_front(); + } + assert(found_inst); +} + +template +LWBackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + LWBackEnd *_be) + : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +LWBackEnd::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (dcacheMiss) { + be->removeDcacheMiss(inst); + } + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template +const char * +LWBackEnd::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template +LWBackEnd::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template +void +LWBackEnd::DCacheCompletionEvent::process() +{ +} + +template +const char * +LWBackEnd::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template +LWBackEnd::LWBackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + xcSquash(false), cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + maxOutstandingMemOps = 4; + numWaitingMemOps = 0; + waitingInsts = 0; +// IQ.setBE(this); + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + +// IQ.setIssueExecQueue(&i2e); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template +std::string +LWBackEnd::name() const +{ + return cpu->name() + ".backend"; +} + +template +void +LWBackEnd::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; inumber_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; + +// IQ.regStats(); +} + +template +void +LWBackEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +#if FULL_SYSTEM +template +void +LWBackEnd::checkInterrupts() +{ + if (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode(thread->readPC()) && + !trapSquash && + !xcSquash) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + // Generate trap squash event. + generateTrapEvent(); + + DPRINTF(BE, "Interrupt detected.\n"); + } +} + +template +void +LWBackEnd::handleFault(Fault &fault, Tick latency) +{ + DPRINTF(BE, "Handling fault!"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + commitStatus = TrapPending; + + // Generate trap squash event. + generateTrapEvent(latency); +} +#endif + +template +void +LWBackEnd::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + ROB_count[0]+= numInsts; + + wbCycle = 0; + +#if FULL_SYSTEM + checkInterrupts(); + + if (trapSquash) { + assert(!xcSquash); + squashFromTrap(); + } else if (xcSquash) { + squashFromXC(); + } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) { + DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); + Fault fetch_fault = frontEnd->getFault(); + if (fetch_fault == NoFault) { + DPRINTF(BE, "Fetch no longer has a fault, cancelling out.\n"); + fetchHasFault = false; + } else { + handleFault(fetch_fault); + fetchHasFault = false; + } + } +#endif + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + + if (dispatchStatus != Blocked) { + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + if (commitStatus != TrapPending) { + executeInsts(); + + commitInsts(); + } + + LSQ.writebackStores(); + + DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, " + "LSQ loads: %i, LSQ stores: %i\n", + waitingInsts, numWaitingMemOps, numInsts, + LSQ.numLoads(), LSQ.numStores()); + +#ifdef DEBUG + assert(numInsts == instList.size()); + assert(waitingInsts == waitingList.size()); + assert(numWaitingMemOps == waitingMemOps.size()); +#endif +} + +template +void +LWBackEnd::updateStructures() +{ + if (fromCommit->doneSeqNum) { + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { +// LSQ.executeLoad(fromCommit->lqIdx); + } else { +// IQ.scheduleNonSpec( +// fromCommit->nonSpecSeqNum); + } + } +} + +template +void +LWBackEnd::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template +void +LWBackEnd::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + while (numInsts < numROBEntries && + numWaitingMemOps < maxOutstandingMemOps) { + // Get instruction from front of time buffer + DynInstPtr inst = frontEnd->getInst(); + if (!inst) { + break; + } else if (inst->isSquashed()) { + continue; + } + + ++numInsts; + instList.push_front(inst); + + inst->setInROB(); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + if (inst->readyToIssue() && !inst->isNonSpeculative()) { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); + if (inst->isMemRef()) { + LSQ.insert(inst); + } + } else { + if (inst->isNonSpeculative()) { + inst->setCanCommit(); + DPRINTF(BE, "Adding non speculative instruction\n"); + } + + if (inst->isMemRef()) { + addWaitingMemOp(inst); + LSQ.insert(inst); + } + + DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " + "waitingList.\n", + inst->seqNum); + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. +/* + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +*/ +} + +template +void +LWBackEnd::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template +void +LWBackEnd::checkDispatchStatus() +{ + DPRINTF(BE, "Checking dispatch status\n"); + assert(dispatchStatus == Blocked); + if (!LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template +void +LWBackEnd::executeInsts() +{ + DPRINTF(BE, "Trying to execute instructions\n"); + + int num_executed = 0; + while (!exeList.empty() && num_executed < issueWidth) { + DynInstPtr inst = exeList.top(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + ++num_executed; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + exeList.pop(); + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + if (dcacheInterface->isBlocked()) { + // Should I move the instruction aside? + DPRINTF(BE, "Execute: dcache is blocked\n"); + break; + } + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + if (inst->isLoad()) { + LSQ.executeLoad(inst); + } else if (inst->isStore()) { + LSQ.executeStore(inst); + if (inst->req && !(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + } else { + panic("Unknown mem type!"); + } + } else { + inst->execute(); + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + ++funcExeInst; + ++num_executed; + // keep an instruction count + thread->numInst++; + thread->numInsts++; + + exeList.pop(); + + if (inst->mispredicted()) { + squashDueToBranch(inst); + break; + } else if (LSQ.violation()) { + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Squash. + squashDueToMemViolation(inst); + } + } + + issued_ops[0]+= num_executed; + n_issued_dist[num_executed]++; +} + +template +void +LWBackEnd::instToCommit(DynInstPtr &inst) +{ + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + + if (inst->isExecuted()) { + inst->setCompleted(); + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_inst[0]+= dependents; + } + } + } + + writeback_count[0]++; +} + +template +void +LWBackEnd::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setCompleted(); + + if (inst->isExecuted()) { + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} + +template +bool +LWBackEnd::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.back(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + thread->setPC(inst->readPC()); + thread->setNextPC(inst->readNextPC()); + inst->reachedCommit = true; + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + if (inst->isNonSpeculative()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) +#else + if ((inst->isMemBarrier() || inst->isWriteBarrier() || + inst->isQuiesce()) && + LSQ.hasStoresToWB()) +#endif + { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + if (inst->isStore()) + removeWaitingMemOp(inst); + } + + exeList.push(inst); + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + removeWaitingMemOp(inst); + } + replayMemInst(inst); + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (inst->isThreadSync()) + { + // Not handled for now. + panic("Thread sync instructions are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + if (inst_fault != NoFault) { + if (!inst->isNop()) { + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + thread->setInst( + static_cast(inst->staticInst->machInst)); +#if FULL_SYSTEM + handleFault(inst_fault); + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + } + + if (inst->isControl()) { +// ++commitCommittedBranches; + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_back(); + + --numInsts; + cpu->numInst++; + thread->numInsts++; + ++thread->funcExeInst; + // Maybe move this to where teh fault is handled; if the fault is handled, + // don't try to set this myself as the fault will set it. If not, then + // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4. + thread->setPC(thread->readNextPC()); + updateComInstStats(inst); + + // Write the done sequence number here. +// LSQ.commitLoads(inst->seqNum); +// LSQ.commitStores(inst->seqNum); + toIEW->doneSeqNum = inst->seqNum; + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); + xcSquash = true; + return false; + } +#endif + return true; +} + +template +void +LWBackEnd::commitInsts() +{ + int commit_width = commitWidth ? commitWidth : width; + + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commit_width) { + if (instList.back()->isSquashed()) { + instList.back()->clearDependents(); + instList.pop_back(); + --numInsts; + continue; + } + + if (!commitInst(inst_num++)) { + DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " + "%#x is head of ROB and not ready\n", + instList.back()->seqNum, instList.back()->readPC()); + break; + } + } + n_committed_dist.sample(inst_num); +} + +template +void +LWBackEnd::squash(const InstSeqNum &sn) +{ + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt waiting_list_end = waitingList.end(); + InstListIt insts_it = waitingList.begin(); + + while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + if ((*insts_it)->isMemRef()) { + DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n", + (*insts_it)->seqNum); + removeWaitingMemOp((*insts_it)); + } + + waitingList.erase(insts_it++); + waitingInsts--; + } + assert(waitingInsts >= 0); + + insts_it = instList.begin(); + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + (*insts_it)->removeInROB(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + instList.erase(insts_it++); + --numInsts; + } + + insts_it = waitingList.begin(); + while (!waitingList.empty() && insts_it != waitingList.end()) { + if ((*insts_it)->seqNum < sn) { + ++insts_it; + continue; + } + assert((*insts_it)->isSquashed()); + + waitingList.erase(insts_it++); + waitingInsts--; + } + + frontEnd->addFreeRegs(freed_regs); +} + +template +void +LWBackEnd::squashFromXC() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + + thread->trapPending = false; + thread->inSyscall = false; + xcSquash = false; + commitStatus = Running; +} + +template +void +LWBackEnd::squashFromTrap() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + + thread->trapPending = false; + thread->inSyscall = false; + trapSquash = false; + commitStatus = Running; +} + +template +void +LWBackEnd::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template +void +LWBackEnd::squashDueToMemViolation(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + false, inst->mispredicted()); +} + +template +void +LWBackEnd::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template +void +LWBackEnd::fetchFault(Fault &fault) +{ + faultFromFetch = fault; + fetchHasFault = true; +} + +template +void +LWBackEnd::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template +void +LWBackEnd::updateComInstStats(DynInstPtr &inst) +{ + unsigned thread = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[thread]++; + } else { + stat_com_inst[thread]++; + } +#else + stat_com_inst[thread]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[thread]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[thread]++; + + if (inst->isLoad()) { + stat_com_loads[thread]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[thread]++; + } +} + +template +void +LWBackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("ExeList size: %i\n", exeList.size()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("Waiting list size: %i\n", waitingList.size()); + + inst_list_it = --(waitingList.end()); + + while (inst_list_it != waitingList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("waitingMemOps list size: %i\n", waitingMemOps.size()); + + MemIt waiting_it = waitingMemOps.begin(); + + while (waiting_it != waitingMemOps.end()) + { + cprintf("[sn:%lli] ", (*waiting_it)); + waiting_it++; + ++num; + } + cprintf("\n"); +} diff --git a/cpu/ozone/lw_lsq.cc b/cpu/ozone/lw_lsq.cc new file mode 100644 index 000000000..922228b09 --- /dev/null +++ b/cpu/ozone/lw_lsq.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lw_lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLWLSQ; + diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh new file mode 100644 index 000000000..2b2c25b58 --- /dev/null +++ b/cpu/ozone/lw_lsq.hh @@ -0,0 +1,649 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_LSQ_HH__ +#define __CPU_OZONE_LW_LSQ_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template +class OzoneLWLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, + Event *wb_event, OzoneLWLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + DynInstPtr inst; + + BackEnd *be; + /** The writeback event for the store. Needed for store + * conditionals. + */ + Event *wbEvent; + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ *lsqPtr; + }; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLWLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ + void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + +// Fault executeLoad(int lq_idx); + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue.back().canWB && + !storeQueue.back().completed && + !dcacheInterface->isBlocked(); } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ + PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + + typename std::list::iterator lqIt; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLWLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ +// std::vector storeQueue; + std::list storeQueue; + /** The load queue. */ +// std::vector loadQueue; + std::list loadQueue; + + typedef typename std::list::iterator SQIt; + typedef typename std::list::iterator LQIt; + + + struct HashFn { + size_t operator() (const int a) const + { + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; + } + }; + + m5::hash_map SQItHash; + std::queue SQIndices; + m5::hash_map LQItHash; + std::queue LQIndices; + + typedef typename m5::hash_map::iterator LQHashIt; + typedef typename m5::hash_map::iterator SQHashIt; + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + + int storesToWB; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ +// int stallingLoadIdx; + LQIt stallingLoad; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ +// int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (!loadQueue.empty()) { + return loadQueue.back()->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ +// int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (!storeQueue.empty()) { + return storeQueue.back().inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + typename m5::hash_map::iterator + lq_hash_it = LQItHash.find(load_idx); + assert(lq_hash_it != LQItHash.end()); + DynInstPtr inst = (*(*lq_hash_it).second); + + if (inst->isExecuted()) { + panic("Should not reach this point with split ops!"); + + memcpy(&data,req->data,req->size); + + return NoFault; + } + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->flags & UNCACHEABLE && + (inst != loadQueue.back() || !inst->reachedCommit)) { + DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " + "commit/LSQ!\n", + inst->seqNum); + be->rescheduleMemInst(inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + SQIt sq_it = storeQueue.begin(); + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n", + load_idx, req->paddr); + + while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum) + ++sq_it; + + while (1) { + // End once we've reached the top of the LSQ + if (sq_it == storeQueue.end()) { + break; + } + + assert((*sq_it).inst); + + store_size = (*sq_it).size; + + if (store_size == 0) { + sq_it++; + continue; + } + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= (*sq_it).inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= ((*sq_it).inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < ((*sq_it).inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > (*sq_it).inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + + int shift_amt = req->vaddr & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = (*sq_it).data >> shift_amt; + + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " + "[sn:%lli] addr %#x, data %#x\n", + (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data)); + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if ((*sq_it).completed) { + sq_it++; + break; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + inst->seqNum < + (*stallingLoad)->seqNum)) { + stalled = true; + stallingStoreIsn = (*sq_it).inst->seqNum; + stallingLoad = (*lq_hash_it).second; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(inst); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store [sn:%lli] to load addr %#x\n", + (*sq_it).inst->seqNum, req->vaddr); + + return NoFault; + } + sq_it++; + } + + + // If there's no forwarding case, then go access memory + ++usedPorts; + + // if we have a cache, do cache access too + if (dcacheInterface) { + if (dcacheInterface->isBlocked()) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) + return NoFault; + + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " + "vaddr:%#x flags:%i\n", + inst->readPC(), req->paddr, req->vaddr, req->flags); + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + assert(!req->completionEvent); + req->completionEvent = + new typename BackEnd::LdWritebackEvent(inst, be); + + // Do Cache Access + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + // @todo: Probably should support having no events + if (result != MA_HIT) { + DPRINTF(OzoneLSQ, "D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + } else { +// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + DPRINTF(OzoneLSQ, "D-cache hit!\n"); + } + } else { + fatal("Must use D-cache with new memory system"); + } + + return NoFault; +} + +template +template +Fault +OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + + SQIt sq_it = (*sq_hash_it).second; + assert((*sq_it).inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | [sn:%lli]\n", + store_idx, req->paddr, data, (*sq_it).inst->seqNum); + + (*sq_it).req = req; + (*sq_it).size = sizeof(T); + (*sq_it).data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_OZONE_LW_LSQ_HH__ diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh new file mode 100644 index 000000000..54d7ead6c --- /dev/null +++ b/cpu/ozone/lw_lsq_impl.hh @@ -0,0 +1,766 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lw_lsq.hh" + +template +OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, + BackEnd *_be, + Event *wb_event, + OzoneLWLSQ *lsq_ptr) + : Event(&mainEventQueue), + inst(_inst), + be(_be), + wbEvent(wb_event), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +OzoneLWLSQ::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", + inst->seqNum); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (wbEvent) { + wbEvent->process(); + delete wbEvent; + } + + lsqPtr->completeStore(inst->sqIdx); + be->removeDcacheMiss(inst); +} + +template +const char * +OzoneLWLSQ::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template +OzoneLWLSQ::OzoneLWLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template +void +OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) + +{ + DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + for (int i = 0; i < LQEntries * 10; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + // May want to initialize these entries to NULL + +// loadHead = loadTail = 0; + +// storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template +std::string +OzoneLWLSQ::name() const +{ + return "lsqunit"; +} + +template +void +OzoneLWLSQ::clearLQ() +{ + loadQueue.clear(); +} + +template +void +OzoneLWLSQ::clearSQ() +{ + storeQueue.clear(); +} + +template +void +OzoneLWLSQ::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} + +template +void +OzoneLWLSQ::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template +void +OzoneLWLSQ::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template +void +OzoneLWLSQ::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + +// inst->setInLSQ(); +} + +template +void +OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) +{ + assert(!LQIndices.empty()); + int load_index = LQIndices.front(); + LQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), load_index, load_inst->seqNum); + + load_inst->lqIdx = load_index; + + loadQueue.push_front(load_inst); + LQItHash[load_index] = loadQueue.begin(); + + ++loads; +} + +template +void +OzoneLWLSQ::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert(stores - storesToWB < SQEntries); + + assert(!SQIndices.empty()); + int store_index = SQIndices.front(); + SQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), store_index, store_inst->seqNum); + + store_inst->sqIdx = store_index; + SQEntry entry(store_inst); + if (loadQueue.empty()) { + entry.lqIt = loadQueue.end(); + } else { + entry.lqIt = loadQueue.begin(); + } + storeQueue.push_front(entry); + + SQItHash[store_index] = storeQueue.begin(); + + ++stores; +} + +template +typename Impl::DynInstPtr +OzoneLWLSQ::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +OzoneLWLSQ::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +OzoneLWLSQ::numLoadsReady() +{ + int retval = 0; + LQIt lq_it = loadQueue.begin(); + LQIt end_it = loadQueue.end(); + + while (lq_it != end_it) { + if ((*lq_it)->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template +Fault +OzoneLWLSQ::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. + be->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +OzoneLWLSQ::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + SQIt sq_it = (*sq_hash_it).second; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = (*sq_it).size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isNonSpeculative()) { + // Nonspeculative accesses (namely store conditionals) + // need to set themselves as able to writeback if we + // haven't had a fault by here. + (*sq_it).canWB = true; + + ++storesToWB; + DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n", + storesToWB); + } + } + + LQIt lq_it = --(loadQueue.end()); + + if (!memDepViolator) { + while (lq_it != loadQueue.end()) { + if ((*lq_it)->seqNum < store_inst->seqNum) { + lq_it--; + continue; + } + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if (((*lq_it)->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = (*lq_it); + + return TheISA::genMachineCheckFault(); + } + + lq_it--; + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +OzoneLWLSQ::commitLoad() +{ + assert(!loadQueue.empty()); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue.back()->seqNum, loadQueue.back()->readPC()); + + LQIndices.push(loadQueue.back()->lqIdx); + LQItHash.erase(loadQueue.back()->lqIdx); + + loadQueue.pop_back(); + + --loads; +} + +template +void +OzoneLWLSQ::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || !loadQueue.empty()); + + while (loads != 0 && + loadQueue.back()->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +OzoneLWLSQ::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || !storeQueue.empty()); + + SQIt sq_it = --(storeQueue.end()); + while (!storeQueue.empty() && sq_it != storeQueue.end()) { + assert((*sq_it).inst); + if (!(*sq_it).canWB) { + if ((*sq_it).inst->seqNum > youngest_inst) { + break; + } + ++storesToWB; + + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli], storesToWB:%i\n", + (*sq_it).inst->readPC(), + (*sq_it).inst->seqNum, + storesToWB); + + (*sq_it).canWB = true; + } + + sq_it--; + } +} + +template +void +OzoneLWLSQ::writebackStores() +{ + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB && + usedPorts < cachePorts) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; + completeStore(inst->sqIdx); + + continue; + } + + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { +// Event *wb = NULL; + + typename BackEnd::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=1; + wb = new typename BackEnd::LdWritebackEvent(inst, + be); + } + + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// inst->seqNum); + + // Will stores need their own kind of writeback events? + // Do stores even need writeback events? + assert(!req->completionEvent); + req->completionEvent = new + StoreCompletionEvent(inst, be, wb, this); + be->addDcacheMiss(inst); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + // Increment stat here or something + + sq_it--; + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + inst->sqIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + if (req->flags & UNCACHEABLE) { + req->result = 2; + } else { + req->result = 1; + } + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + wb->schedule(curTick); + } + sq_it--; + completeStore(inst->sqIdx); + } + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +template +void +OzoneLWLSQ::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + + LQIt lq_it = loadQueue.begin(); + + while (loads != 0 && (*lq_it)->seqNum > squashed_num) { + assert(!loadQueue.empty()); + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + (*lq_it)->readPC(), + (*lq_it)->seqNum); + + if (isStalled() && lq_it == stallingLoad) { + stalled = false; + stallingStoreIsn = 0; + stallingLoad = NULL; + } + + --loads; + + // Inefficient! + LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx); + assert(lq_hash_it != LQItHash.end()); + LQItHash.erase(lq_hash_it); + LQIndices.push((*lq_it)->lqIdx); + loadQueue.erase(lq_it++); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + SQIt sq_it = storeQueue.begin(); + + while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) { + assert(!storeQueue.empty()); + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n", + (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx, + (*sq_it).inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + (*sq_it).inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx); + assert(sq_hash_it != SQItHash.end()); + SQItHash.erase(sq_hash_it); + SQIndices.push((*sq_it).inst->sqIdx); + (*sq_it).inst = NULL; + (*sq_it).canWB = 0; + + if ((*sq_it).req) { + assert(!(*sq_it).req->completionEvent); + } + (*sq_it).req = NULL; + --stores; + storeQueue.erase(sq_it++); + } +} + +template +void +OzoneLWLSQ::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + LQIt lq_it = --(loadQueue.end()); + + while (lq_it != loadQueue.end() && (*lq_it)) { + cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum, + (*lq_it)->readPC()); + + lq_it--; + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + SQIt sq_it = --(storeQueue.end()); + + while (sq_it != storeQueue.end() && (*sq_it).inst) { + cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n", + (*sq_it).inst->seqNum, + (*sq_it).inst->readPC(), + (*sq_it).size, + (*sq_it).committed, + (*sq_it).completed, + (*sq_it).canWB); + + sq_it--; + } + + cprintf("\n"); +} + +template +void +OzoneLWLSQ::completeStore(int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + SQIt sq_it = (*sq_hash_it).second; + + assert((*sq_it).inst); + (*sq_it).completed = true; + DynInstPtr inst = (*sq_it).inst; + + --storesToWB; + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", + inst->sqIdx, inst->seqNum, storesToWB); + + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. +// cpu->activityThisCycle(); + assert(!storeQueue.empty()); + SQItHash.erase(sq_hash_it); + SQIndices.push(inst->sqIdx); + storeQueue.erase(sq_it); + --stores; +/* + SQIt oldest_store_it = --(storeQueue.end()); + if (sq_it == oldest_store_it) { + do { + inst = (*oldest_store_it).inst; + sq_hash_it = SQItHash.find(inst->sqIdx); + assert(sq_hash_it != SQItHash.end()); + SQItHash.erase(sq_hash_it); + SQIndices.push(inst->sqIdx); + storeQueue.erase(oldest_store_it--); + + --stores; + } while ((*oldest_store_it).completed && + oldest_store_it != storeQueue.end()); + +// be->updateLSQNextCycle = true; + } +*/ +} diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh index a2c706c60..1f543ec6e 100644 --- a/cpu/ozone/ozone_impl.hh +++ b/cpu/ozone/ozone_impl.hh @@ -35,6 +35,8 @@ #include "cpu/ozone/front_end.hh" #include "cpu/ozone/inst_queue.hh" #include "cpu/ozone/lsq_unit.hh" +#include "cpu/ozone/lw_lsq.hh" +#include "cpu/ozone/lw_back_end.hh" #include "cpu/ozone/null_predictor.hh" #include "cpu/ozone/dyn_inst.hh" #include "cpu/ozone/simple_params.hh" @@ -55,10 +57,10 @@ struct OzoneImpl { typedef TwobitBPredUnit BranchPred; typedef FrontEnd FrontEnd; // Will need IQ, LSQ eventually - typedef BackEnd BackEnd; + typedef LWBackEnd BackEnd; typedef InstQueue InstQueue; - typedef OzoneLSQ LdstQueue; + typedef OzoneLWLSQ LdstQueue; typedef OzoneDynInst DynInst; typedef RefCountingPtr DynInstPtr; -- cgit v1.2.3 From e704960c80033dd008907caa7c24742a1020d302 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 24 Apr 2006 17:10:06 -0400 Subject: Updates to Ozone model for quiesce, store conditionals. --HG-- extra : convert_revision : 72ddd75ad0b5783aca9484e7d178c2915ee8e355 --- cpu/ozone/cpu.hh | 122 +++++++++++++++++++++++++++++++++++++++--- cpu/ozone/cpu_impl.hh | 62 +++++---------------- cpu/ozone/dyn_inst_impl.hh | 1 + cpu/ozone/front_end.hh | 10 ++-- cpu/ozone/front_end_impl.hh | 69 +++++++++++++++++++----- cpu/ozone/lw_back_end.hh | 6 ++- cpu/ozone/lw_back_end_impl.hh | 61 ++++++++++++--------- cpu/ozone/lw_lsq.hh | 25 +++++---- cpu/ozone/lw_lsq_impl.hh | 32 +++++++++-- 9 files changed, 272 insertions(+), 116 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 17e0f5c42..d37d3360c 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -42,7 +42,6 @@ #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" #include "mem/mem_interface.hh" -#include "mem/page_table.hh" #include "sim/eventq.hh" // forward declarations @@ -59,7 +58,6 @@ class GDBListener; #else -class PageTable; class Process; #endif // FULL_SYSTEM @@ -349,9 +347,8 @@ class OzoneCPU : public BaseCPU // L1 data cache MemInterface *dcacheInterface; -#if !FULL_SYSTEM - PageTable *pTable; -#endif + /** Pointer to memory. */ + FunctionalMemory *mem; FrontEnd *frontEnd; @@ -428,24 +425,62 @@ class OzoneCPU : public BaseCPU int getInstAsid() { return thread.asid; } int getDataAsid() { return thread.asid; } + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + /** Translates instruction requestion in syscall emulation mode. */ Fault translateInstReq(MemReqPtr &req) { - return this->pTable->translate(req); + return dummyTranslation(req); } /** Translates data read request in syscall emulation mode. */ Fault translateDataReadReq(MemReqPtr &req) { - return this->pTable->translate(req); + return dummyTranslation(req); } /** Translates data write request in syscall emulation mode. */ Fault translateDataWriteReq(MemReqPtr &req) { - return this->pTable->translate(req); + return dummyTranslation(req); } #endif + + /** Old CPU read from memory function. No longer used. */ + template + Fault read(MemReqPtr &req, T &data) + { +// panic("CPU READ NOT IMPLEMENTED W/NEW MEMORY\n"); +#if 0 +#if FULL_SYSTEM && defined(TARGET_ALPHA) + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif +#endif + Fault error; + if (req->flags & LOCKED) { +// lockAddr = req->paddr; + lockFlag = true; + } + + error = this->mem->read(req, data); + data = gtoh(data); + return error; + } + + /** CPU read function, forwards read to LSQ. */ template Fault read(MemReqPtr &req, T &data, int load_idx) @@ -453,6 +488,75 @@ class OzoneCPU : public BaseCPU return backEnd->read(req, data, load_idx); } + /** Old CPU write to memory function. No longer used. */ + template + Fault write(MemReqPtr &req, T &data) + { +#if 0 +#if FULL_SYSTEM && defined(TARGET_ALPHA) + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < this->system->execContexts.size(); i++){ + xc = this->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif +#endif + + if (req->flags & LOCKED) { + if (req->flags & UNCACHEABLE) { + req->result = 2; + } else { + if (this->lockFlag/* && this->lockAddr == req->paddr*/) { + req->result = 1; + } else { + req->result = 0; + return NoFault; + } + } + } + + return this->mem->write(req, (T)htog(data)); + } + /** CPU write function, forwards write to LSQ. */ template Fault write(MemReqPtr &req, T &data, int store_idx) @@ -507,6 +611,8 @@ class OzoneCPU : public BaseCPU bool stall; }; TimeBuffer comm; + + bool lockFlag; }; #endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index c205ad319..a7bc61603 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -149,12 +149,14 @@ OzoneCPU::DCacheCompletionEvent::description() template OzoneCPU::OzoneCPU(Params *p) #if FULL_SYSTEM - : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), + : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), mem(p->mem), #else : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), + mem(p->workload[0]->getMemory()), #endif comm(5, 5) { + frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); @@ -245,51 +247,7 @@ OzoneCPU::OzoneCPU(Params *p) globalSeqNum = 1; checkInterrupts = false; -/* - fetchRedirBranch = true; - fetchRedirExcp = true; - - // Need to initialize the rename maps, and the head and tail pointers. - robHeadPtr = new DynInst(this); - robTailPtr = new DynInst(this); - - robHeadPtr->setNextInst(robTailPtr); -// robHeadPtr->setPrevInst(NULL); -// robTailPtr->setNextInst(NULL); - robTailPtr->setPrevInst(robHeadPtr); - - robHeadPtr->setCompleted(); - robTailPtr->setCompleted(); - - for (int i = 0; i < ISA::TotalNumRegs; ++i) { - renameTable[i] = new DynInst(this); - commitTable[i] = new DynInst(this); - renameTable[i]->setCompleted(); - commitTable[i]->setCompleted(); - } - -#if FULL_SYSTEM - for (int i = 0; i < ISA::NumIntRegs; ++i) { - palShadowTable[i] = new DynInst(this); - palShadowTable[i]->setCompleted(); - } -#endif - - // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; - - // Create mask to get rid of offset bits. - cacheBlkMask = (cacheBlkSize - 1); - - // Get the size of an instruction. - instSize = sizeof(MachInst); - - // Create space to store a cache line. - cacheData = new uint8_t[cacheBlkSize]; - - cacheBlkValid = false; -*/ for (int i = 0; i < TheISA::TotalNumRegs; ++i) { thread.renameTable[i] = new DynInst(this); thread.renameTable[i]->setCompleted(); @@ -299,9 +257,11 @@ OzoneCPU::OzoneCPU(Params *p) backEnd->renameTable.copyFrom(thread.renameTable); #if !FULL_SYSTEM - pTable = p->pTable; +// pTable = p->pTable; #endif + lockFlag = 0; + DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n"); } @@ -392,6 +352,7 @@ OzoneCPU::activateContext(int thread_num, int delay) scheduleTickEvent(delay); _status = Running; thread._status = ExecContext::Active; + frontEnd->wakeFromQuiesce(); } template @@ -401,8 +362,8 @@ OzoneCPU::suspendContext(int thread_num) // Eventually change this in SMT. assert(thread_num == 0); // assert(xcProxy); - - assert(_status == Running); + // @todo: Figure out how to initially set the status properly so this is running. +// assert(_status == Running); notIdleFraction--; unscheduleTickEvent(); _status = Idle; @@ -665,6 +626,7 @@ OzoneCPU::tick() { DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n"); + _status = Running; thread.renameTable[ZeroReg]->setIntResult(0); thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]-> setDoubleResult(0.0); @@ -756,7 +718,7 @@ OzoneCPU::tick() // check for instruction-count-based events comInstEventQueue[0]->serviceEvents(numInst); - if (!tickEvent.scheduled()) + if (!tickEvent.scheduled() && _status == Running) tickEvent.schedule(curTick + 1); } @@ -821,6 +783,8 @@ OzoneCPU::hwrei() thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); + lockFlag = false; + // Not sure how to make a similar check in the Ozone model // if (!misspeculating()) { kernelStats->hwrei(); diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh index 2d86ced62..c83481c9a 100644 --- a/cpu/ozone/dyn_inst_impl.hh +++ b/cpu/ozone/dyn_inst_impl.hh @@ -237,6 +237,7 @@ OzoneDynInst::hwrei() this->cpu->kernelStats->hwrei(); this->cpu->checkInterrupts = true; + this->cpu->lockFlag = false; // FIXME: XXX check for interrupts? XXX return NoFault; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 251f4200c..2bff2544d 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -60,7 +60,7 @@ class FrontEnd const bool is_branch = false, const bool branch_taken = false); DynInstPtr getInst(); - void processCacheCompletion(); + void processCacheCompletion(MemReqPtr &req); void addFreeRegs(int num_freed); @@ -109,6 +109,7 @@ class FrontEnd SerializeBlocked, SerializeComplete, RenameBlocked, + QuiescePending, BEBlocked }; @@ -130,17 +131,16 @@ class FrontEnd class ICacheCompletionEvent : public Event { private: + MemReqPtr req; FrontEnd *frontEnd; public: - ICacheCompletionEvent(FrontEnd *_fe); + ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *_fe); virtual void process(); virtual const char *description(); }; - ICacheCompletionEvent cacheCompletionEvent; - MemInterface *icacheInterface; #if !FULL_SYSTEM @@ -174,6 +174,8 @@ class FrontEnd void setPC(Addr val) { PC = val; } void setNextPC(Addr val) { nextPC = val; } + void wakeFromQuiesce(); + void dumpInsts(); private: diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index af452fe95..7c18386cf 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -1,4 +1,5 @@ +#include "arch/faults.hh" #include "arch/isa_traits.hh" #include "base/statistics.hh" #include "cpu/exec_context.hh" @@ -12,7 +13,6 @@ using namespace TheISA; template FrontEnd::FrontEnd(Params *params) : branchPred(params), - cacheCompletionEvent(this), icacheInterface(params->icacheInterface), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), @@ -26,10 +26,12 @@ FrontEnd::FrontEnd(Params *params) // Setup branch predictor. // Setup Memory Request +/* memReq = new MemReq(); memReq->asid = 0; memReq->data = new uint8_t[64]; - +*/ + memReq = NULL; // Size of cache block. cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; @@ -46,7 +48,7 @@ FrontEnd::FrontEnd(Params *params) cacheBlkValid = false; #if !FULL_SYSTEM - pTable = params->pTable; +// pTable = params->pTable; #endif fetchFault = NoFault; } @@ -72,7 +74,7 @@ void FrontEnd::setXC(ExecContext *xc_ptr) { xc = xc_ptr; - memReq->xc = xc; +// memReq->xc = xc; } template @@ -269,6 +271,9 @@ FrontEnd::tick() } updateStatus(); return; + } else if (status == QuiescePending) { + DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); + return; } else if (status != IcacheMissComplete) { if (fetchCacheLineNextCycle) { Fault fault = fetchCacheLine(); @@ -325,6 +330,14 @@ FrontEnd::tick() // rename(num_inst); // } +#if FULL_SYSTEM + if (inst->isQuiesce()) { + warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); + status = QuiescePending; + break; + } +#endif + if (inst->predTaken()) { // Start over with tick? break; @@ -364,6 +377,12 @@ FrontEnd::fetchCacheLine() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. + memReq = new MemReq(); + + memReq->asid = 0; + memReq->thread_num = 0; + memReq->data = new uint8_t[64]; + memReq->xc = xc; memReq->cmd = Read; memReq->reset(fetch_PC, cacheBlkSize, flags); @@ -377,16 +396,26 @@ FrontEnd::fetchCacheLine() // Now do the timing access to see whether or not the instruction // exists within the cache. if (icacheInterface && fault == NoFault) { +#if FULL_SYSTEM + if (cpu->system->memctrl->badaddr(memReq->paddr)) { + DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " + "misspeculating path!", + memReq->paddr); + return TheISA::genMachineCheckFault(); + } +#endif + memReq->completionEvent = NULL; memReq->time = curTick; + fault = cpu->mem->read(memReq, cacheData); MemAccessResult res = icacheInterface->access(memReq); // If the cache missed then schedule an event to wake // up this stage once the cache miss completes. if (icacheInterface->doEvents() && res != MA_HIT) { - memReq->completionEvent = new ICacheCompletionEvent(this); + memReq->completionEvent = new ICacheCompletionEvent(memReq, this); status = IcacheMissStall; @@ -398,7 +427,7 @@ FrontEnd::fetchCacheLine() cacheBlkValid = true; - memcpy(cacheData, memReq->data, memReq->size); +// memcpy(cacheData, memReq->data, memReq->size); } } @@ -541,7 +570,8 @@ FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, // Clear the icache miss if it's outstanding. if (status == IcacheMissStall && icacheInterface) { DPRINTF(FE, "Squashing outstanding Icache miss.\n"); - icacheInterface->squash(0); +// icacheInterface->squash(0); + memReq = NULL; } if (status == SerializeBlocked) { @@ -577,12 +607,13 @@ FrontEnd::getInst() template void -FrontEnd::processCacheCompletion() +FrontEnd::processCacheCompletion(MemReqPtr &req) { DPRINTF(FE, "Processing cache completion\n"); // Do something here. - if (status != IcacheMissStall) { + if (status != IcacheMissStall || + req != memReq) { DPRINTF(FE, "Previous fetch was squashed.\n"); return; } @@ -595,10 +626,11 @@ FrontEnd::processCacheCompletion() fetchStatus[tid] = IcacheMissComplete; } */ - memcpy(cacheData, memReq->data, memReq->size); +// memcpy(cacheData, memReq->data, memReq->size); // Reset the completion event to NULL. - memReq->completionEvent = NULL; +// memReq->completionEvent = NULL; + memReq = NULL; } template @@ -766,6 +798,15 @@ FrontEnd::renameInst(DynInstPtr &inst) } } +template +void +FrontEnd::wakeFromQuiesce() +{ + DPRINTF(FE, "Waking up from quiesce\n"); + // Hopefully this is safe + status = Running; +} + template void FrontEnd::dumpInsts() @@ -786,8 +827,8 @@ FrontEnd::dumpInsts() } template -FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(FrontEnd *fe) - : Event(&mainEventQueue, Delayed_Writeback_Pri), frontEnd(fe) +FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) + : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) { this->setFlags(Event::AutoDelete); } @@ -796,7 +837,7 @@ template void FrontEnd::ICacheCompletionEvent::process() { - frontEnd->processCacheCompletion(); + frontEnd->processCacheCompletion(req); } template diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index b89957aad..f17c93ff4 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -94,8 +94,7 @@ class LWBackEnd void regStats(); - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(FullCPU *cpu_ptr); void setFrontEnd(FrontEnd *front_end_ptr) { frontEnd = front_end_ptr; } @@ -404,6 +403,9 @@ class LWBackEnd Stats::Scalar<> commit_eligible_samples; Stats::Vector<> commit_eligible; + Stats::Vector<> squashedInsts; + Stats::Vector<> ROBSquashedInsts; + Stats::Scalar<> ROB_fcount; Stats::Formula ROB_full_rate; diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index 115821787..d1290239c 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -480,6 +480,18 @@ LWBackEnd::regStats() .desc("number cycles where commit BW limit reached") ; + squashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:squashed_insts") + .desc("Number of instructions removed from inst list") + ; + + ROBSquashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:rob_squashed_insts") + .desc("Number of instructions removed from inst list when they reached the head of the ROB") + ; + ROB_fcount .name(name() + ".ROB:full_count") .desc("number of cycles where ROB was full") @@ -515,6 +527,14 @@ LWBackEnd::regStats() // IQ.regStats(); } +template +void +LWBackEnd::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + LSQ.setCPU(cpu_ptr); +} + template void LWBackEnd::setCommBuffer(TimeBuffer *_comm) @@ -1044,35 +1064,24 @@ LWBackEnd::commitInst(int inst_num) } } - // Now check if it's one of the special trap or barrier or - // serializing instructions. - if (inst->isThreadSync()) - { - // Not handled for now. - panic("Thread sync instructions are not handled yet.\n"); - } + // Not handled for now. + assert(!inst->isThreadSync()); // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); if (inst_fault != NoFault) { - if (!inst->isNop()) { - DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", - inst->seqNum, inst->readPC()); - thread->setInst( - static_cast(inst->staticInst->machInst)); + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + thread->setInst( + static_cast(inst->staticInst->machInst)); #if FULL_SYSTEM - handleFault(inst_fault); - return false; + handleFault(inst_fault); + return false; #else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); #endif // FULL_SYSTEM - } - } - - if (inst->isControl()) { -// ++commitCommittedBranches; } int freed_regs = 0; @@ -1096,7 +1105,6 @@ LWBackEnd::commitInst(int inst_num) instList.pop_back(); --numInsts; - cpu->numInst++; thread->numInsts++; ++thread->funcExeInst; // Maybe move this to where teh fault is handled; if the fault is handled, @@ -1134,15 +1142,14 @@ template void LWBackEnd::commitInsts() { - int commit_width = commitWidth ? commitWidth : width; - // Not sure this should be a loop or not. int inst_num = 0; - while (!instList.empty() && inst_num < commit_width) { + while (!instList.empty() && inst_num < commitWidth) { if (instList.back()->isSquashed()) { instList.back()->clearDependents(); instList.pop_back(); --numInsts; + ROBSquashedInsts[instList.back()->threadNumber]++; continue; } @@ -1150,6 +1157,7 @@ LWBackEnd::commitInsts() DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " "%#x is head of ROB and not ready\n", instList.back()->seqNum, instList.back()->readPC()); + --inst_num; break; } } @@ -1217,6 +1225,8 @@ LWBackEnd::squash(const InstSeqNum &sn) (*insts_it)->clearDependents(); + squashedInsts[(*insts_it)->threadNumber]++; + instList.erase(insts_it++); --numInsts; } @@ -1350,6 +1360,7 @@ LWBackEnd::updateComInstStats(DynInstPtr &inst) { unsigned thread = inst->threadNumber; + cpu->numInst++; // // Pick off the software prefetches // diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index 2b2c25b58..eb9886244 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -43,7 +43,7 @@ //#include "mem/page_table.hh" #include "sim/sim_object.hh" -class PageTable; +//class PageTable; /** * Class that implements the actual LQ and SQ for each specific thread. @@ -115,7 +115,7 @@ class OzoneLWLSQ { { be = be_ptr; } /** Sets the page table pointer. */ - void setPageTable(PageTable *pt_ptr); +// void setPageTable(PageTable *pt_ptr); /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. @@ -243,7 +243,7 @@ class OzoneLWLSQ { MemInterface *dcacheInterface; /** Pointer to the page table. */ - PageTable *pTable; +// PageTable *pTable; public: struct SQEntry { @@ -562,6 +562,19 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) // If there's no forwarding case, then go access memory + DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", + inst->readPC()); + + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); + ++usedPorts; // if we have a cache, do cache access too @@ -582,12 +595,6 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) "vaddr:%#x flags:%i\n", inst->readPC(), req->paddr, req->vaddr, req->flags); - // Setup MemReq pointer - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; assert(!req->completionEvent); req->completionEvent = diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 54d7ead6c..7b22d2564 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -131,7 +131,7 @@ OzoneLWLSQ::clearSQ() { storeQueue.clear(); } - +/* template void OzoneLWLSQ::setPageTable(PageTable *pt_ptr) @@ -139,7 +139,7 @@ OzoneLWLSQ::setPageTable(PageTable *pt_ptr) DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); pTable = pt_ptr; } - +*/ template void OzoneLWLSQ::resizeLQ(unsigned size) @@ -519,6 +519,23 @@ OzoneLWLSQ::writebackStores() req->paddr, *(req->data), inst->seqNum); + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + if (dcacheInterface) { MemAccessResult result = dcacheInterface->access(req); @@ -538,7 +555,7 @@ OzoneLWLSQ::writebackStores() typename BackEnd::LdWritebackEvent *wb = NULL; if (req->flags & LOCKED) { // Stx_C does not generate a system port transaction. - req->result=1; +// req->result=1; wb = new typename BackEnd::LdWritebackEvent(inst, be); } @@ -571,12 +588,12 @@ OzoneLWLSQ::writebackStores() if (req->flags & LOCKED) { // Stx_C does not generate a system port transaction. - if (req->flags & UNCACHEABLE) { +/* if (req->flags & UNCACHEABLE) { req->result = 2; } else { req->result = 1; } - +*/ typename BackEnd::LdWritebackEvent *wb = new typename BackEnd::LdWritebackEvent(inst, be); @@ -642,6 +659,11 @@ OzoneLWLSQ::squash(const InstSeqNum &squashed_num) while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) { assert(!storeQueue.empty()); + + if ((*sq_it).canWB) { + break; + } + // Clear the smart pointer to make sure it is decremented. DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n", (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx, -- cgit v1.2.3 From 31e09892d750d0e6dc7de3d455e34808c159a420 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 24 Apr 2006 17:11:31 -0400 Subject: Include option for disabling PC symbols. cpu/inst_seq.hh: cpu/o3/cpu.cc: cpu/ozone/cpu_builder.cc: cpu/ozone/thread_state.hh: SE build fixes. --HG-- extra : convert_revision : a4df6128533105f849b5469f62d83dffe299b7df --- cpu/ozone/cpu_builder.cc | 12 ++++++------ cpu/ozone/thread_state.hh | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 8ac6858b0..0146dd1bd 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -45,7 +45,7 @@ SimObjectParam itb; SimObjectParam dtb; #else SimObjectVectorParam workload; -SimObjectParam page_table; +//SimObjectParam page_table; #endif // FULL_SYSTEM SimObjectParam mem; @@ -159,7 +159,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM(dtb, "Data translation buffer"), #else INIT_PARAM(workload, "Processes to run"), - INIT_PARAM(page_table, "Page table"), +// INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM INIT_PARAM_DFLT(mem, "Memory", NULL), @@ -310,7 +310,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->dtb = dtb; #else params->workload = workload; - params->pTable = page_table; +// params->pTable = page_table; #endif // FULL_SYSTEM params->mem = mem; @@ -440,7 +440,7 @@ SimObjectParam itb; SimObjectParam dtb; #else SimObjectVectorParam workload; -SimObjectParam page_table; +//SimObjectParam page_table; #endif // FULL_SYSTEM SimObjectParam mem; @@ -554,7 +554,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM(dtb, "Data translation buffer"), #else INIT_PARAM(workload, "Processes to run"), - INIT_PARAM(page_table, "Page table"), +// INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM INIT_PARAM_DFLT(mem, "Memory", NULL), @@ -705,7 +705,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) params->dtb = dtb; #else params->workload = workload; - params->pTable = page_table; +// params->pTable = page_table; #endif // FULL_SYSTEM params->mem = mem; diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh index c6d23a63b..269fc6459 100644 --- a/cpu/ozone/thread_state.hh +++ b/cpu/ozone/thread_state.hh @@ -6,9 +6,10 @@ #include "arch/isa_traits.hh" #include "cpu/exec_context.hh" #include "cpu/thread_state.hh" +#include "sim/process.hh" class Event; -class Process; +//class Process; #if FULL_SYSTEM class EndQuiesceEvent; @@ -40,7 +41,7 @@ struct OzoneThreadState : public ThreadState { } #else OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, NULL, _process, _asid), + : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), cpu(_cpu), inSyscall(0), trapPending(0) { memset(®s, 0, sizeof(TheISA::RegFile)); -- cgit v1.2.3 From d363d5aad72b34769c753752a779a13e11532fd8 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 24 Apr 2006 17:40:00 -0400 Subject: Quiesce stuff. cpu/ozone/cpu.hh: Add quiesce stat (not clear how it should be used yet). cpu/ozone/cpu_impl.hh: Fix for quiesce. --HG-- extra : convert_revision : a1998818e241374ae3f4c3cabbef885dda55c884 --- cpu/ozone/cpu.hh | 2 ++ cpu/ozone/cpu_impl.hh | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index d37d3360c..56b6571a2 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -613,6 +613,8 @@ class OzoneCPU : public BaseCPU TimeBuffer comm; bool lockFlag; + + Stats::Scalar<> quiesceCycles; }; #endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index a7bc61603..17d944e7c 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -413,6 +413,11 @@ OzoneCPU::regStats() .desc("Percentage of idle cycles") ; + quiesceCycles + .name(name() + ".quiesce_cycles") + .desc("Number of cycles spent in quiesce") + ; + idleFraction = constant(1.0) - notIdleFraction; frontEnd->regStats(); @@ -609,7 +614,8 @@ OzoneCPU::post_interrupt(int int_num, int index) { BaseCPU::post_interrupt(int_num, index); - if (thread._status == ExecContext::Suspended) { +// if (thread._status == ExecContext::Suspended) { + if (_status == Idle) { DPRINTF(IPI,"Suspended Processor awoke\n"); // thread.activate(); // Hack for now. Otherwise might have to go through the xcProxy, or -- cgit v1.2.3 From 21df09cf7aa6bdec5de11904751d355e773a3168 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 11 May 2006 19:18:36 -0400 Subject: Fixes for ozone CPU to successfully boot and run linux. cpu/base_dyn_inst.hh: Remove snoop function (did not mean to commit it). cpu/ozone/back_end_impl.hh: Set instruction as having its result ready, not completed. cpu/ozone/cpu.hh: Fixes for store conditionals. Use an additional lock addr list to make sure that the access is valid. I don't know if this is fully necessary, but it gives me a peace of mind (at some performance cost). Make sure to schedule for cycles(1) and not just 1 cycle in the future as tick = 1ps. Also support the new Checker. cpu/ozone/cpu_builder.cc: Add parameter for maxOutstandingMemOps so it can be set through the config. Also add in the checker. Right now it's a BaseCPU simobject, but that may change in the future. cpu/ozone/cpu_impl.hh: Add support for the checker. For now there's a dynamic cast to convert the simobject passed back from the builder to the proper Checker type. It's ugly, but only happens at startup, and is probably a justified use of dynamic cast. Support switching out/taking over from other CPUs. Correct indexing problem for float registers. cpu/ozone/dyn_inst.hh: Add ability for instructions to wait on memory instructions in addition to source register instructions. This is needed for memory dependence predictors and memory barriers. cpu/ozone/dyn_inst_impl.hh: Support waiting on memory operations. Use "resultReady" to differentiate an instruction having its registers produced vs being totally completed. cpu/ozone/front_end.hh: Support switching out. Also record if an interrupt is pending. cpu/ozone/front_end_impl.hh: Support switching out. Also support stalling the front end if an interrupt is pending. cpu/ozone/lw_back_end.hh: Add checker in. Support switching out. Support memory barriers. cpu/ozone/lw_back_end_impl.hh: Lots of changes to get things to work right. Faults, traps, interrupts all wait until all stores have written back (important). Memory barriers are supported, as is the general ability for instructions to be dependent on other memory instructions. cpu/ozone/lw_lsq.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. cpu/ozone/lw_lsq_impl.hh: Support switching out. Also use store writeback events in all cases, not just dcache misses. Support the checker CPU. Marks instructions as completed once the functional access is done (which has to be done for the checker to be able to verify results). cpu/ozone/simple_params.hh: Add max outstanding mem ops parameter. python/m5/objects/OzoneCPU.py: Add max outstanding mem ops, checker. --HG-- extra : convert_revision : f4d408e1bb1f25836a097b6abe3856111e950c59 --- cpu/ozone/back_end_impl.hh | 2 +- cpu/ozone/cpu.hh | 28 ++++- cpu/ozone/cpu_builder.cc | 16 ++- cpu/ozone/cpu_impl.hh | 118 ++++++++++++++----- cpu/ozone/dyn_inst.hh | 40 +++++-- cpu/ozone/dyn_inst_impl.hh | 43 ++++++- cpu/ozone/front_end.hh | 13 +++ cpu/ozone/front_end_impl.hh | 58 +++++++++- cpu/ozone/lw_back_end.hh | 20 +++- cpu/ozone/lw_back_end_impl.hh | 256 +++++++++++++++++++++++++++++++++--------- cpu/ozone/lw_lsq.hh | 32 +++++- cpu/ozone/lw_lsq_impl.hh | 189 ++++++++++++++++++++++++------- cpu/ozone/simple_params.hh | 1 + 13 files changed, 657 insertions(+), 159 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh index 0b0f04f59..36770d65c 100644 --- a/cpu/ozone/back_end_impl.hh +++ b/cpu/ozone/back_end_impl.hh @@ -1385,7 +1385,7 @@ BackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = IQ.wakeDependents(inst); diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 56b6571a2..eec8902d8 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -53,6 +53,7 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; +class Sampler; class RemoteGDB; class GDBListener; @@ -69,6 +70,9 @@ namespace Trace { class InstRecord; } +template +class Checker; + /** * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with * simple out-of-order capabilities added to it. It is still a 1 CPI machine @@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU }; // execution context proxy - OzoneXC xcProxy; + OzoneXC ozoneXC; + ExecContext *xcProxy; + ExecContext *checkerXC; typedef OzoneThreadState ImplState; @@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU void tick(); std::set snList; + std::set lockAddrList; private: struct TickEvent : public Event { @@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU void scheduleTickEvent(int delay) { if (tickEvent.squashed()) - tickEvent.reschedule(curTick + delay); + tickEvent.reschedule(curTick + cycles(delay)); else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + delay); + tickEvent.schedule(curTick + cycles(delay)); } /// Unschedule tick event, regardless of its current state. @@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(); + void switchOut(Sampler *sampler); void takeOverFrom(BaseCPU *oldCPU); #if FULL_SYSTEM @@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU Fault error; if (req->flags & LOCKED) { // lockAddr = req->paddr; + lockAddrList.insert(req->paddr); lockFlag = true; } @@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU req->result = 2; } else { if (this->lockFlag/* && this->lockAddr == req->paddr*/) { - req->result = 1; + if (lockAddrList.find(req->paddr) != + lockAddrList.end()) { + req->result = 1; + } else { + req->result = 0; + return NoFault; + } } else { req->result = 0; return NoFault; @@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU void setSyscallReturn(SyscallReturn return_value, int tid); #endif - ExecContext *xcBase() { return &xcProxy; } + ExecContext *xcBase() { return xcProxy; } bool decoupledFrontEnd; struct CommStruct { @@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU bool lockFlag; Stats::Scalar<> quiesceCycles; + + Checker *checker; }; #endif // __CPU_OZONE_CPU_HH__ diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc index 0146dd1bd..64aa49c71 100644 --- a/cpu/ozone/cpu_builder.cc +++ b/cpu/ozone/cpu_builder.cc @@ -1,6 +1,7 @@ #include +#include "cpu/checker/cpu.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" @@ -50,6 +51,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -66,6 +69,7 @@ Param backEndSquashLatency; Param backEndLatency; Param maxInstBufferSize; Param numPhysicalRegs; +Param maxOutstandingMemOps; Param decodeToFetchDelay; Param renameToFetchDelay; @@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), @@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; @@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) params->backEndLatency = backEndLatency; params->maxInstBufferSize = maxInstBufferSize; params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + params->maxOutstandingMemOps = maxOutstandingMemOps; params->decodeToFetchDelay = decodeToFetchDelay; params->renameToFetchDelay = renameToFetchDelay; @@ -445,6 +453,8 @@ SimObjectVectorParam workload; SimObjectParam mem; +SimObjectParam checker; + Param max_insts_any_thread; Param max_insts_all_threads; Param max_loads_any_thread; @@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + INIT_PARAM_DFLT(max_insts_any_thread, "Terminate when any thread reaches this inst count", 0), @@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU) #endif // FULL_SYSTEM params->mem = mem; - + params->checker = checker; params->max_insts_any_thread = max_insts_any_thread; params->max_insts_all_threads = max_insts_all_threads; params->max_loads_any_thread = max_loads_any_thread; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 17d944e7c..4f3fdf521 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -33,6 +33,7 @@ #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/base.hh" +#include "cpu/checker/exec_context.hh" #include "cpu/exec_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" @@ -156,17 +157,33 @@ OzoneCPU::OzoneCPU(Params *p) #endif comm(5, 5) { - + if (p->checker) { + BaseCPU *temp_checker = p->checker; + checker = dynamic_cast *>(temp_checker); + } else { + checker = NULL; + } frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); _status = Idle; - thread.xcProxy = &xcProxy; + if (checker) { + checker->setMemory(mem); +#if FULL_SYSTEM + checker->setSystem(p->system); +#endif + checkerXC = new CheckerExecContext(&ozoneXC, checker); + thread.xcProxy = checkerXC; + xcProxy = checkerXC; + } else { + thread.xcProxy = &ozoneXC; + xcProxy = &ozoneXC; + } thread.inSyscall = false; - xcProxy.cpu = this; - xcProxy.thread = &thread; + ozoneXC.cpu = this; + ozoneXC.thread = &thread; thread.setStatus(ExecContext::Suspended); #if FULL_SYSTEM @@ -177,7 +194,7 @@ OzoneCPU::OzoneCPU(Params *p) thread.tid = 0; thread.mem = p->mem; - thread.quiesceEvent = new EndQuiesceEvent(&xcProxy); + thread.quiesceEvent = new EndQuiesceEvent(xcProxy); system = p->system; itb = p->itb; @@ -187,9 +204,10 @@ OzoneCPU::OzoneCPU(Params *p) if (p->profile) { thread.profile = new FunctionProfile(p->system->kernelSymtab); + // @todo: This might be better as an ExecContext instead of OzoneXC Callback *cb = new MakeCallback(&xcProxy); + &OzoneXC::dumpFuncProfile>(&ozoneXC); registerExitCallback(cb); } @@ -198,7 +216,6 @@ OzoneCPU::OzoneCPU(Params *p) static ProfileNode dummyNode; thread.profileNode = &dummyNode; thread.profilePC = 3; - #else // xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0); thread.cpu = this; @@ -225,13 +242,13 @@ OzoneCPU::OzoneCPU(Params *p) issueWidth = p->issueWidth; */ - execContexts.push_back(&xcProxy); + execContexts.push_back(xcProxy); frontEnd->setCPU(this); backEnd->setCPU(this); - frontEnd->setXC(&xcProxy); - backEnd->setXC(&xcProxy); + frontEnd->setXC(xcProxy); + backEnd->setXC(xcProxy); frontEnd->setThreadState(&thread); backEnd->setThreadState(&thread); @@ -250,7 +267,7 @@ OzoneCPU::OzoneCPU(Params *p) for (int i = 0; i < TheISA::TotalNumRegs; ++i) { thread.renameTable[i] = new DynInst(this); - thread.renameTable[i]->setCompleted(); + thread.renameTable[i]->setResultReady(); } frontEnd->renameTable.copyFrom(thread.renameTable); @@ -312,11 +329,15 @@ OzoneCPU::copyToXC() */ template void -OzoneCPU::switchOut() +OzoneCPU::switchOut(Sampler *sampler) { + // Front end needs state from back end, so switch out the back end first. + backEnd->switchOut(); + frontEnd->switchOut(); _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); + sampler->signalSwitched(); } template @@ -325,8 +346,16 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) { BaseCPU::takeOverFrom(oldCPU); + backEnd->takeOverFrom(); + frontEnd->takeOverFrom(); assert(!tickEvent.scheduled()); + // @todo: Fix hardcoded number + // Clear out any old information in time buffer. + for (int i = 0; i < 6; ++i) { + comm.advance(); + } + // if any of this CPU's ExecContexts are active, mark the CPU as // running and schedule its tick event. for (int i = 0; i < execContexts.size(); ++i) { @@ -470,7 +499,7 @@ OzoneCPU::serialize(std::ostream &os) BaseCPU::serialize(os); SERIALIZE_ENUM(_status); nameOut(os, csprintf("%s.xc", name())); - xcProxy.serialize(os); + ozoneXC.serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -481,7 +510,7 @@ OzoneCPU::unserialize(Checkpoint *cp, const std::string §ion) { BaseCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); - xcProxy.unserialize(cp, csprintf("%s.xc", section)); + ozoneXC.unserialize(cp, csprintf("%s.xc", section)); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } @@ -579,7 +608,7 @@ template Addr OzoneCPU::dbg_vtophys(Addr addr) { - return vtophys(&xcProxy, addr); + return vtophys(xcProxy, addr); } #endif // FULL_SYSTEM /* @@ -725,7 +754,7 @@ OzoneCPU::tick() comInstEventQueue[0]->serviceEvents(numInst); if (!tickEvent.scheduled() && _status == Running) - tickEvent.schedule(curTick + 1); + tickEvent.schedule(curTick + cycles(1)); } template @@ -750,7 +779,7 @@ OzoneCPU::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(&xcProxy); + thread.process->syscall(xcProxy); thread.funcExeInst--; @@ -784,19 +813,17 @@ OzoneCPU::hwrei() { // Need to move this to ISA code // May also need to make this per thread +/* if (!inPalMode()) return new UnimplementedOpcodeFault; thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); - +*/ lockFlag = false; + lockAddrList.clear(); + kernelStats->hwrei(); - // Not sure how to make a similar check in the Ozone model -// if (!misspeculating()) { - kernelStats->hwrei(); - - checkInterrupts = true; -// } + checkInterrupts = true; // FIXME: XXX check for interrupts? XXX return NoFault; @@ -847,6 +874,11 @@ OzoneCPU::processInterrupts() if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) { thread.setMiscReg(IPR_ISR, summary); thread.setMiscReg(IPR_INTID, ipl); + // @todo: Make this more transparent + if (checker) { + checkerXC->setMiscReg(IPR_ISR, summary); + checkerXC->setMiscReg(IPR_INTID, ipl); + } Fault fault = new InterruptFault; fault->invoke(thread.getXCProxy()); DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", @@ -860,7 +892,7 @@ OzoneCPU::simPalCheck(int palFunc) { // Need to move this to ISA code // May also need to make this per thread - this->kernelStats->callpal(palFunc, &xcProxy); + this->kernelStats->callpal(palFunc, xcProxy); switch (palFunc) { case PAL::halt: @@ -944,7 +976,28 @@ OzoneCPU::OzoneXC::dumpFuncProfile() template void OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) -{ } +{ + // some things should already be set up + assert(getMemPtr() == old_context->getMemPtr()); +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); +#if !FULL_SYSTEM + setFuncExeInst(old_context->readFuncExeInst()); +#endif + +// storeCondFailures = 0; + cpu->lockFlag = false; + + old_context->setStatus(ExecContext::Unallocated); +} template void @@ -1062,21 +1115,24 @@ template float OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) { - return thread->renameTable[reg_idx]->readFloatResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readFloatResult(); } template double OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) { - return thread->renameTable[reg_idx]->readDoubleResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readDoubleResult(); } template uint64_t OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) { - return thread->renameTable[reg_idx]->readIntResult(); + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readIntResult(); } template @@ -1101,7 +1157,9 @@ template void OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) { - thread->renameTable[reg_idx]->setDoubleResult(val); + int idx = reg_idx + TheISA::FP_Base_DepTag; + + thread->renameTable[idx]->setDoubleResult(val); if (!thread->inSyscall) { cpu->squashFromXC(); diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh index 4382af0fd..f251c28ea 100644 --- a/cpu/ozone/dyn_inst.hh +++ b/cpu/ozone/dyn_inst.hh @@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst typedef TheISA::MiscReg MiscReg; typedef typename std::list::iterator ListIt; - // Note that this is duplicated from the BaseDynInst class; I'm simply not - // sure the enum would carry through so I could use it in array - // declarations in this class. + // Note that this is duplicated from the BaseDynInst class; I'm + // simply not sure the enum would carry through so I could use it + // in array declarations in this class. enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, MaxInstDestRegs = TheISA::MaxInstDestRegs @@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst void addDependent(DynInstPtr &dependent_inst); std::vector &getDependents() { return dependents; } + std::vector &getMemDeps() { return memDependents; } + std::list &getMemSrcs() { return srcMemInsts; } void wakeDependents(); + void wakeMemDependents(); + + void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } + + void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } + + void markMemInstReady(OzoneDynInst *inst); + + // For now I will remove instructions from the list when they wake + // up. In the future, you only really need a counter. + bool memDepReady() { return srcMemInsts.empty(); } + // void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; } // BPredInfo &getBPredInfo() { return bpInfo; } @@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst std::vector dependents; - /** The instruction that produces the value of the source registers. These - * may be NULL if the value has already been read from the source - * instruction. + std::vector memDependents; + + std::list srcMemInsts; + + /** The instruction that produces the value of the source + * registers. These may be NULL if the value has already been + * read from the source instruction. */ DynInstPtr srcInsts[MaxInstSrcRegs]; @@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setIntReg(si, idx, val); } void setFloatRegSingle(const StaticInst *si, int idx, float val) { - this->instResult.fp = val; + BaseDynInst::setFloatRegSingle(si, idx, val); } void setFloatRegDouble(const StaticInst *si, int idx, double val) { - this->instResult.dbl = val; + BaseDynInst::setFloatRegDouble(si, idx, val); } void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) { - this->instResult.integer = val; + BaseDynInst::setFloatRegInt(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst void clearDependents(); + void clearMemDependents(); + public: // ISA stuff MiscReg readMiscReg(int misc_reg); diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh index c83481c9a..a7e4460a1 100644 --- a/cpu/ozone/dyn_inst_impl.hh +++ b/cpu/ozone/dyn_inst_impl.hh @@ -38,7 +38,7 @@ template OzoneDynInst::OzoneDynInst(FullCPU *cpu) : BaseDynInst(0, 0, 0, 0, cpu) { - this->setCompleted(); + this->setResultReady(); initInstPtrs(); } @@ -130,7 +130,7 @@ template bool OzoneDynInst::srcInstReady(int regIdx) { - return srcInsts[regIdx]->isCompleted(); + return srcInsts[regIdx]->isResultReady(); } template @@ -149,6 +149,28 @@ OzoneDynInst::wakeDependents() } } +template +void +OzoneDynInst::wakeMemDependents() +{ + for (int i = 0; i < memDependents.size(); ++i) { + memDependents[i]->markMemInstReady(this); + } +} + +template +void +OzoneDynInst::markMemInstReady(OzoneDynInst *inst) +{ + ListIt mem_it = srcMemInsts.begin(); + while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { + mem_it++; + } + assert(mem_it != srcMemInsts.end()); + + srcMemInsts.erase(mem_it); +} + template void OzoneDynInst::initInstPtrs() @@ -164,7 +186,7 @@ bool OzoneDynInst::srcsReady() { for (int i = 0; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -176,7 +198,7 @@ bool OzoneDynInst::eaSrcsReady() { for (int i = 1; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isCompleted()) + if (!srcInsts[i]->isResultReady()) return false; } @@ -195,6 +217,14 @@ OzoneDynInst::clearDependents() prevDestInst[i] = NULL; } } + +template +void +OzoneDynInst::clearMemDependents() +{ + memDependents.clear(); +} + template MiscReg OzoneDynInst::readMiscReg(int misc_reg) @@ -213,6 +243,7 @@ template Fault OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) { + this->setIntResult(val); return this->thread->setMiscReg(misc_reg, val); } @@ -234,11 +265,13 @@ OzoneDynInst::hwrei() this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + this->cpu->hwrei(); +/* this->cpu->kernelStats->hwrei(); this->cpu->checkInterrupts = true; this->cpu->lockFlag = false; - +*/ // FIXME: XXX check for interrupts? XXX return NoFault; } diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 2bff2544d..188925ae5 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -66,6 +66,14 @@ class FrontEnd bool isEmpty() { return instBuffer.empty(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: bool updateStatus(); @@ -198,6 +206,9 @@ class FrontEnd DynInstPtr barrierInst; + public: + bool interruptPending; + private: // number of idle cycles /* Stats::Average<> notIdleFraction; @@ -223,6 +234,8 @@ class FrontEnd Stats::Scalar<> fetchBlockedCycles; /** Stat for total number of fetched cache lines. */ Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; /** Distribution of number of instructions fetched each cycle. */ Stats::Distribution<> fetchNisnDist; // Stats::Vector<> qfull_iq_occupancy; diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 7c18386cf..a3eb809d0 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -19,8 +19,11 @@ FrontEnd::FrontEnd(Params *params) width(params->frontEndWidth), freeRegs(params->numPhysicalRegs), numPhysRegs(params->numPhysicalRegs), - serializeNext(false) + serializeNext(false), + interruptPending(false) { + switchedOut = false; + status = Idle; // Setup branch predictor. @@ -127,6 +130,11 @@ FrontEnd::regStats() .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + fetchNisnDist .init(/* base value */ 0, /* last value */ width, @@ -370,6 +378,10 @@ FrontEnd::fetchCacheLine() #endif // FULL_SYSTEM Fault fault = NoFault; + if (interruptPending && flags == 0) { + return fault; + } + // Align the fetch PC so it's at the start of a cache block. Addr fetch_PC = icacheBlockAlignPC(PC); @@ -397,7 +409,8 @@ FrontEnd::fetchCacheLine() // exists within the cache. if (icacheInterface && fault == NoFault) { #if FULL_SYSTEM - if (cpu->system->memctrl->badaddr(memReq->paddr)) { + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " "misspeculating path!", memReq->paddr); @@ -497,7 +510,7 @@ FrontEnd::processBarriers(DynInstPtr &inst) dispatchedTempSerializing++; } - // Change status over to BarrierStall so that other stages know + // Change status over to SerializeBlocked so that other stages know // what this is blocked on. status = SerializeBlocked; @@ -613,8 +626,10 @@ FrontEnd::processCacheCompletion(MemReqPtr &req) // Do something here. if (status != IcacheMissStall || - req != memReq) { + req != memReq || + switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; return; } @@ -702,6 +717,7 @@ FrontEnd::getInstFromCacheline() DynInstPtr inst = barrierInst; status = Running; barrierInst = NULL; + inst->clearSerializeBefore(); return inst; } @@ -773,7 +789,7 @@ FrontEnd::renameInst(DynInstPtr &inst) DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); - if (src_inst->isCompleted()) { + if (src_inst->isResultReady()) { DPRINTF(FE, "Reg ready.\n"); inst->markSrcRegReady(i); } else { @@ -807,6 +823,38 @@ FrontEnd::wakeFromQuiesce() status = Running; } +template +void +FrontEnd::switchOut() +{ + switchedOut = true; + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template +void +FrontEnd::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + template void FrontEnd::dumpInsts() diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index f17c93ff4..028fdaf8c 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -17,6 +17,8 @@ #include "mem/mem_req.hh" #include "sim/eventq.hh" +template +class Checker; class ExecContext; template @@ -126,6 +128,8 @@ class LWBackEnd Addr commitPC; + Tick lastCommitCycle; + bool robEmpty() { return instList.empty(); } bool isFull() { return numInsts >= numROBEntries; } @@ -133,7 +137,7 @@ class LWBackEnd void fetchFault(Fault &fault); - int wakeDependents(DynInstPtr &inst); + int wakeDependents(DynInstPtr &inst, bool memory_deps = false); /** Tells memory dependence unit that a memory instruction needs to be * rescheduled. It will re-execute once replayMemInst() is called. @@ -182,6 +186,12 @@ class LWBackEnd void instToCommit(DynInstPtr &inst); + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + private: void generateTrapEvent(Tick latency = 0); void handleFault(Fault &fault, Tick latency = 0); @@ -303,6 +313,10 @@ class LWBackEnd Fault faultFromFetch; bool fetchHasFault; + bool switchedOut; + + DynInstPtr memBarrier; + private: struct pqCompare { bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const @@ -327,7 +341,7 @@ class LWBackEnd bool exactFullStall; - bool fetchRedirect[Impl::MaxThreads]; +// bool fetchRedirect[Impl::MaxThreads]; // number of cycles stalled for D-cache misses /* Stats::Scalar<> dcacheStallCycles; @@ -414,6 +428,8 @@ class LWBackEnd Stats::VectorDistribution<> ROB_occ_dist; public: void dumpInsts(); + + Checker *checker; }; template diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index d1290239c..d4829629d 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -1,5 +1,6 @@ #include "encumbered/cpu/full/op_class.hh" +#include "cpu/checker/cpu.hh" #include "cpu/ozone/lw_back_end.hh" template @@ -10,28 +11,36 @@ LWBackEnd::generateTrapEvent(Tick latency) TrapEvent *trap = new TrapEvent(this); - trap->schedule(curTick + latency); + trap->schedule(curTick + cpu->cycles(latency)); thread->trapPending = true; } template int -LWBackEnd::wakeDependents(DynInstPtr &inst) +LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) { assert(!inst->isSquashed()); - std::vector &dependents = inst->getDependents(); + std::vector &dependents = memory_deps ? inst->getMemDeps() : + inst->getDependents(); int num_outputs = dependents.size(); DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); for (int i = 0; i < num_outputs; i++) { DynInstPtr dep_inst = dependents[i]; - dep_inst->markSrcRegReady(); + if (!memory_deps) { + dep_inst->markSrcRegReady(); + } else { + if (!dep_inst->isSquashed()) + dep_inst->markMemInstReady(inst.get()); + } + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); if (dep_inst->readyToIssue() && dep_inst->isInROB() && - !dep_inst->isNonSpeculative()) { + !dep_inst->isNonSpeculative() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) { DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", dep_inst->seqNum); exeList.push(dep_inst); @@ -114,6 +123,9 @@ LWBackEnd::LdWritebackEvent::process() // iewStage->wakeCPU(); + if (be->isSwitchedOut()) + return; + if (dcacheMiss) { be->removeDcacheMiss(inst); } @@ -169,16 +181,18 @@ LWBackEnd::DCacheCompletionEvent::description() template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - xcSquash(false), cacheCompletionEvent(this), + trapSquash(false), xcSquash(false), cacheCompletionEvent(this), dcacheInterface(params->dcacheInterface), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; numDispatchEntries = 32; - maxOutstandingMemOps = 4; + maxOutstandingMemOps = params->maxOutstandingMemOps; numWaitingMemOps = 0; waitingInsts = 0; + switchedOut = false; + // IQ.setBE(this); LSQ.setBE(this); @@ -533,6 +547,7 @@ LWBackEnd::setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); + checker = cpu->checker; } template @@ -554,30 +569,35 @@ LWBackEnd::checkInterrupts() !cpu->inPalMode(thread->readPC()) && !trapSquash && !xcSquash) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. + frontEnd->interruptPending = true; + if (robEmpty() && !LSQ.hasStoresToWB()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. - // Not sure which thread should be the one to interrupt. For now - // always do thread 0. - assert(!thread->inSyscall); - thread->inSyscall = true; + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; - // CPU will handle implementation of the interrupt. - cpu->processInterrupts(); + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); - // Now squash or record that I need to squash this cycle. - commitStatus = TrapPending; + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; - // Generate trap squash event. - generateTrapEvent(); + // Generate trap squash event. + generateTrapEvent(); - DPRINTF(BE, "Interrupt detected.\n"); + DPRINTF(BE, "Interrupt detected.\n"); + } else { + DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); + } } } @@ -585,7 +605,7 @@ template void LWBackEnd::handleFault(Fault &fault, Tick latency) { - DPRINTF(BE, "Handling fault!"); + DPRINTF(BE, "Handling fault!\n"); assert(!thread->inSyscall); @@ -615,6 +635,9 @@ LWBackEnd::tick() wbCycle = 0; + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + #if FULL_SYSTEM checkInterrupts(); @@ -623,7 +646,7 @@ LWBackEnd::tick() squashFromTrap(); } else if (xcSquash) { squashFromXC(); - } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) { + } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); Fault fetch_fault = frontEnd->getFault(); if (fetch_fault == NoFault) { @@ -636,9 +659,6 @@ LWBackEnd::tick() } #endif - // Read in any done instruction information and update the IQ or LSQ. - updateStructures(); - if (dispatchStatus != Blocked) { dispatchInsts(); } else { @@ -719,12 +739,41 @@ LWBackEnd::dispatchInsts() for (int i = 0; i < inst->numDestRegs(); ++i) renameTable[inst->destRegIdx(i)] = inst; - if (inst->readyToIssue() && !inst->isNonSpeculative()) { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", - inst->seqNum); - exeList.push(inst); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + } + memBarrier = inst; + inst->setCanCommit(); + } else if (inst->readyToIssue() && !inst->isNonSpeculative()) { if (inst->isMemRef()) { + LSQ.insert(inst); + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + addWaitingMemOp(inst); + + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + inst->seqNum); + exeList.push(inst); } } else { if (inst->isNonSpeculative()) { @@ -735,6 +784,14 @@ LWBackEnd::dispatchInsts() if (inst->isMemRef()) { addWaitingMemOp(inst); LSQ.insert(inst); + if (memBarrier) { + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + } } DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " @@ -872,9 +929,6 @@ LWBackEnd::executeInsts() ++funcExeInst; ++num_executed; - // keep an instruction count - thread->numInst++; - thread->numInsts++; exeList.pop(); @@ -915,7 +969,7 @@ LWBackEnd::instToCommit(DynInstPtr &inst) inst->setCanCommit(); if (inst->isExecuted()) { - inst->setCompleted(); + inst->setResultReady(); int dependents = wakeDependents(inst); if (dependents) { producer_inst[0]++; @@ -956,7 +1010,7 @@ LWBackEnd::writebackInsts() inst->seqNum, inst->readPC()); inst->setCanCommit(); - inst->setCompleted(); + inst->setResultReady(); if (inst->isExecuted()) { int dependents = wakeDependents(inst); @@ -997,7 +1051,9 @@ LWBackEnd::commitInst(int inst_num) // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { - if (inst->isNonSpeculative()) { + if (inst->isNonSpeculative() || + inst->isMemBarrier() || + inst->isWriteBarrier()) { #if !FULL_SYSTEM // Hack to make sure syscalls aren't executed until all stores // write back their data. This direct communication shouldn't @@ -1017,6 +1073,16 @@ LWBackEnd::commitInst(int inst_num) "instruction at the head of the ROB, PC %#x.\n", inst->readPC()); + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", + inst->seqNum); + assert(memBarrier); + wakeDependents(inst, true); + if (memBarrier == inst) + memBarrier = NULL; + inst->clearMemDependents(); + } + // Send back the non-speculative instruction's sequence number. if (inst->iqItValid) { DPRINTF(BE, "Removing instruction from waiting list\n"); @@ -1066,13 +1132,45 @@ LWBackEnd::commitInst(int inst_num) // Not handled for now. assert(!inst->isThreadSync()); - + assert(inst->memDepReady()); + // Stores will mark themselves as totally completed as they need + // to wait to writeback to memory. @todo: Hack...attempt to fix + // having the checker be forced to wait until a store completes in + // order to check all of the instructions. If the store at the + // head of the check list misses, but a later store hits, then + // loads in the checker may see the younger store values instead + // of the store they should see. Either the checker needs its own + // memory (annoying to update), its own store buffer (how to tell + // which value is correct?), or something else... + if (!inst->isStore()) { + inst->setCompleted(); + } // Check if the instruction caused a fault. If so, trap. Fault inst_fault = inst->getFault(); + // Use checker prior to updating anything due to traps or PC + // based events. + if (checker) { + checker->tick(inst); + } + if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", inst->seqNum, inst->readPC()); + + // Instruction is completed as it has a fault. + inst->setCompleted(); + + if (LSQ.hasStoresToWB()) { + DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); + return false; + } else if (inst_num != 0) { + DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); + return false; + } else if (checker && inst->isStore()) { + checker->tick(inst); + } + thread->setInst( static_cast(inst->staticInst->machInst)); #if FULL_SYSTEM @@ -1094,6 +1192,8 @@ LWBackEnd::commitInst(int inst_num) } if (inst->traceData) { + inst->traceData->setFetchSeq(inst->seqNum); + inst->traceData->setCPSeq(thread->numInst); inst->traceData->finalize(); inst->traceData = NULL; } @@ -1105,18 +1205,18 @@ LWBackEnd::commitInst(int inst_num) instList.pop_back(); --numInsts; - thread->numInsts++; ++thread->funcExeInst; - // Maybe move this to where teh fault is handled; if the fault is handled, + // Maybe move this to where the fault is handled; if the fault is handled, // don't try to set this myself as the fault will set it. If not, then // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4. thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); updateComInstStats(inst); // Write the done sequence number here. // LSQ.commitLoads(inst->seqNum); -// LSQ.commitStores(inst->seqNum); toIEW->doneSeqNum = inst->seqNum; + lastCommitCycle = curTick; #if FULL_SYSTEM int count = 0; @@ -1243,6 +1343,22 @@ LWBackEnd::squash(const InstSeqNum &sn) waitingInsts--; } + while (memBarrier && memBarrier->seqNum > sn) { + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum); + memBarrier->clearMemDependents(); + if (memBarrier->memDepReady()) { + DPRINTF(BE, "No previous barrier\n"); + memBarrier = NULL; + } else { + std::list &srcs = memBarrier->getMemSrcs(); + memBarrier = srcs.front(); + srcs.pop_front(); + assert(srcs.empty()); + DPRINTF(BE, "Previous barrier: [sn:%lli]\n", + memBarrier->seqNum); + } + } + frontEnd->addFreeRegs(freed_regs); } @@ -1254,6 +1370,7 @@ LWBackEnd::squashFromXC() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1269,6 +1386,7 @@ LWBackEnd::squashFromTrap() squash(squashed_inst); frontEnd->squash(squashed_inst, thread->readPC(), false, false); + frontEnd->interruptPending = false; thread->trapPending = false; thread->inSyscall = false; @@ -1319,6 +1437,36 @@ LWBackEnd::fetchFault(Fault &fault) fetchHasFault = true; } +template +void +LWBackEnd::switchOut() +{ + switchedOut = true; + // Need to get rid of all committed, non-speculative state and write it + // to memory/XC. In this case this is stores that have committed and not + // yet written back. + LSQ.switchOut(); + squash(0); +} + +template +void +LWBackEnd::takeOverFrom(ExecContext *old_xc) +{ + switchedOut = false; + xcSquash = false; + trapSquash = false; + + numInsts = 0; + numWaitingMemOps = 0; + waitingMemOps.clear(); + waitingInsts = 0; + switchedOut = false; + dispatchStatus = Running; + commitStatus = Running; + LSQ.takeOverFrom(old_xc); +} + template void LWBackEnd::updateExeInstStats(DynInstPtr &inst) @@ -1358,7 +1506,11 @@ template void LWBackEnd::updateComInstStats(DynInstPtr &inst) { - unsigned thread = inst->threadNumber; + unsigned tid = inst->threadNumber; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; cpu->numInst++; // @@ -1366,33 +1518,33 @@ LWBackEnd::updateComInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) { - stat_com_swp[thread]++; + stat_com_swp[tid]++; } else { - stat_com_inst[thread]++; + stat_com_inst[tid]++; } #else - stat_com_inst[thread]++; + stat_com_inst[tid]++; #endif // // Control Instructions // if (inst->isControl()) - stat_com_branches[thread]++; + stat_com_branches[tid]++; // // Memory references // if (inst->isMemRef()) { - stat_com_refs[thread]++; + stat_com_refs[tid]++; if (inst->isLoad()) { - stat_com_loads[thread]++; + stat_com_loads[tid]++; } } if (inst->isMemBarrier()) { - stat_com_membars[thread]++; + stat_com_membars[tid]++; } } diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index eb9886244..042610324 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -41,6 +41,7 @@ #include "cpu/inst_seq.hh" #include "mem/mem_interface.hh" //#include "mem/page_table.hh" +#include "sim/debug.hh" #include "sim/sim_object.hh" //class PageTable; @@ -90,7 +91,10 @@ class OzoneLWLSQ { /** The writeback event for the store. Needed for store * conditionals. */ + public: Event *wbEvent; + bool miss; + private: /** The pointer to the LSQ unit that issued the store. */ OzoneLWLSQ *lsqPtr; }; @@ -228,6 +232,14 @@ class OzoneLWLSQ { !storeQueue.back().completed && !dcacheInterface->isBlocked(); } + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + private: /** Completes the store at the specified index. */ void completeStore(int store_idx); @@ -560,12 +572,10 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) sq_it++; } - // If there's no forwarding case, then go access memory DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", inst->readPC()); - // Setup MemReq pointer req->cmd = Read; req->completionEvent = NULL; @@ -594,8 +604,12 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " "vaddr:%#x flags:%i\n", inst->readPC(), req->paddr, req->vaddr, req->flags); - - +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ assert(!req->completionEvent); req->completionEvent = new typename BackEnd::LdWritebackEvent(inst, be); @@ -647,7 +661,15 @@ OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) (*sq_it).req = req; (*sq_it).size = sizeof(T); (*sq_it).data = data; - + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); +/* + Addr debug_addr = ULL(0xfffffc0000be81a8); + if (req->vaddr == debug_addr) { + debug_break(); + } +*/ // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 7b22d2564..9b7e48f96 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -29,6 +29,7 @@ #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" +#include "cpu/checker/cpu.hh" template OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, @@ -39,6 +40,7 @@ OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, inst(_inst), be(_be), wbEvent(wb_event), + miss(false), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); @@ -54,13 +56,21 @@ OzoneLWLSQ::StoreCompletionEvent::process() //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); // lsqPtr->cpu->wakeCPU(); + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + + return; + } + if (wbEvent) { wbEvent->process(); delete wbEvent; } lsqPtr->completeStore(inst->sqIdx); - be->removeDcacheMiss(inst); + if (miss) + be->removeDcacheMiss(inst); } template @@ -80,8 +90,7 @@ OzoneLWLSQ::OzoneLWLSQ() template void OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) - + unsigned maxSQEntries, unsigned id) { DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); @@ -90,7 +99,7 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, LQEntries = maxLQEntries; SQEntries = maxSQEntries; - for (int i = 0; i < LQEntries * 10; i++) { + for (int i = 0; i < LQEntries * 2; i++) { LQIndices.push(i); SQIndices.push(i); } @@ -196,6 +205,7 @@ template void OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) { + assert(loads < LQEntries * 2); assert(!LQIndices.empty()); int load_index = LQIndices.front(); LQIndices.pop(); @@ -503,21 +513,13 @@ OzoneLWLSQ::writebackStores() assert((*sq_it).req); assert(!(*sq_it).committed); - MemReqPtr req = (*sq_it).req; (*sq_it).committed = true; + MemReqPtr req = (*sq_it).req; + req->cmd = Write; req->completionEvent = NULL; req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); switch((*sq_it).size) { case 1: @@ -535,8 +537,25 @@ OzoneLWLSQ::writebackStores() default: panic("Unexpected store size!\n"); } + if (!(req->flags & LOCKED)) { + (*sq_it).inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick((*sq_it).inst); + } + } + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(inst, be, NULL, this); + req->completionEvent = store_event; + MemAccessResult result = dcacheInterface->access(req); if (isStalled() && @@ -551,13 +570,14 @@ OzoneLWLSQ::writebackStores() if (result != MA_HIT && dcacheInterface->doEvents()) { // Event *wb = NULL; - + store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; if (req->flags & LOCKED) { // Stx_C does not generate a system port transaction. // req->result=1; wb = new typename BackEnd::LdWritebackEvent(inst, be); + store_event->wbEvent = wb; } DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); @@ -567,9 +587,6 @@ OzoneLWLSQ::writebackStores() // Will stores need their own kind of writeback events? // Do stores even need writeback events? - assert(!req->completionEvent); - req->completionEvent = new - StoreCompletionEvent(inst, be, wb, this); be->addDcacheMiss(inst); lastDcacheStall = curTick; @@ -597,10 +614,10 @@ OzoneLWLSQ::writebackStores() typename BackEnd::LdWritebackEvent *wb = new typename BackEnd::LdWritebackEvent(inst, be); - wb->schedule(curTick); + store_event->wbEvent = wb; } sq_it--; - completeStore(inst->sqIdx); +// completeStore(inst->sqIdx); } } else { panic("Must HAVE DCACHE!!!!!\n"); @@ -758,31 +775,121 @@ OzoneLWLSQ::completeStore(int store_idx) DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", inst->sqIdx, inst->seqNum, storesToWB); - // A bit conservative because a store completion may not free up entries, - // but hopefully avoids two store completions in one cycle from making - // the CPU tick twice. -// cpu->activityThisCycle(); assert(!storeQueue.empty()); SQItHash.erase(sq_hash_it); SQIndices.push(inst->sqIdx); storeQueue.erase(sq_it); --stores; -/* - SQIt oldest_store_it = --(storeQueue.end()); - if (sq_it == oldest_store_it) { - do { - inst = (*oldest_store_it).inst; - sq_hash_it = SQItHash.find(inst->sqIdx); - assert(sq_hash_it != SQItHash.end()); - SQItHash.erase(sq_hash_it); - SQIndices.push(inst->sqIdx); - storeQueue.erase(oldest_store_it--); - - --stores; - } while ((*oldest_store_it).completed && - oldest_store_it != storeQueue.end()); - -// be->updateLSQNextCycle = true; +// assert(!inst->isCompleted()); + inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(inst); } -*/ +} + +template +void +OzoneLWLSQ::switchOut() +{ + switchedOut = true; + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; +// completeStore(inst->sqIdx); + + continue; + } + + // Store conditionals don't complete until *after* they have written + // back. If it's here and not yet sent to memory, then don't bother + // as it's not part of committed state. + if (inst->isDataPrefetch() || (*sq_it).committed || + (*sq_it).req->flags & LOCKED) { + sq_it--; + continue; + } + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + } + + // Clear the queue to free up resources + storeQueue.clear(); + loadQueue.clear(); + loads = stores = storesToWB = 0; +} + +template +void +OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) +{ + // Clear out any old state. May be redundant if this is the first time + // the CPU is being used. + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + switchedOut = false; + + // Could do simple checks here to see if indices are on twice + while (!LQIndices.empty()) + LQIndices.pop(); + while (!SQIndices.empty()) + SQIndices.pop(); + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + // May want to initialize these entries to NULL + +// loadHead = loadTail = 0; + +// storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; } diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh index e503654aa..647da1781 100644 --- a/cpu/ozone/simple_params.hh +++ b/cpu/ozone/simple_params.hh @@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params unsigned backEndLatency; unsigned maxInstBufferSize; unsigned numPhysicalRegs; + unsigned maxOutstandingMemOps; // // Fetch // -- cgit v1.2.3 From 52383ca7cc2b4698109b71a968cde16e9f7dc6e0 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 16 May 2006 14:09:04 -0400 Subject: Sampler updates. cpu/ozone/cpu.hh: Updates for sampler. cpu/ozone/cpu_impl.hh: Updates for sampler, checker. cpu/ozone/inorder_back_end.hh: Sampler updates. Also support old memory system. --HG-- extra : convert_revision : 33ebe38e4c08d49c6af84032b819533b784b4fe8 --- cpu/ozone/cpu.hh | 8 ++- cpu/ozone/cpu_impl.hh | 122 +++++++++++++----------------------------- cpu/ozone/front_end.hh | 2 + cpu/ozone/front_end_impl.hh | 10 ++++ cpu/ozone/inorder_back_end.hh | 44 ++++++++++++--- cpu/ozone/lw_back_end.hh | 3 +- cpu/ozone/lw_back_end_impl.hh | 19 +++++++ cpu/ozone/lw_lsq_impl.hh | 9 +++- 8 files changed, 122 insertions(+), 95 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index eec8902d8..1d522b2fa 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -64,6 +64,7 @@ class Process; #endif // FULL_SYSTEM class Checkpoint; +class EndQuiesceEvent; class MemInterface; namespace Trace { @@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM - Event *getQuiesceEvent(); + EndQuiesceEvent *getQuiesceEvent(); Tick readLastActivate(); Tick readLastSuspend(); @@ -330,8 +331,13 @@ class OzoneCPU : public BaseCPU int cpuId; void switchOut(Sampler *sampler); + void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); + Sampler *sampler; + + int switchCount; + #if FULL_SYSTEM Addr dbg_vtophys(Addr addr); diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 4f3fdf521..b085f077f 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -329,15 +329,30 @@ OzoneCPU::copyToXC() */ template void -OzoneCPU::switchOut(Sampler *sampler) +OzoneCPU::switchOut(Sampler *_sampler) { + sampler = _sampler; + switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); frontEnd->switchOut(); - _status = SwitchedOut; - if (tickEvent.scheduled()) - tickEvent.squash(); - sampler->signalSwitched(); +} + +template +void +OzoneCPU::signalSwitched() +{ + if (++switchCount == 2) { + backEnd->doSwitchOut(); + frontEnd->doSwitchOut(); + if (checker) + checker->switchOut(sampler); + _status = SwitchedOut; + if (tickEvent.scheduled()) + tickEvent.squash(); + sampler->signalSwitched(); + } + assert(switchCount <= 2); } template @@ -366,6 +381,11 @@ OzoneCPU::takeOverFrom(BaseCPU *oldCPU) tickEvent.schedule(curTick); } } + // Nothing running, change status to reflect that we're no longer + // switched out. + if (_status == SwitchedOut) { + _status = Idle; + } } template @@ -666,83 +686,6 @@ OzoneCPU::tick() thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]-> setDoubleResult(0.0); - // General code flow: - // Check for any interrupts. Handle them if I do have one. - // Check if I have a need to fetch a new cache block. Either a bit could be - // set by functions indicating that I need to fetch a new block, or I could - // hang onto the last PC of the last cache block I fetched and compare the - // current PC to that. Setting a bit seems nicer but may be more error - // prone. - // Scan through the IQ to figure out if there's anything I can issue/execute - // Might need something close to the FU Pools to tell what instructions - // I can issue. How to handle loads and stores vs other insts? - // Extremely slow way: find first inst that can possibly issue; if it's a - // load or a store, then iterate through load/store queue. - // If I can't find instructions to execute and I've got room in the IQ - // (which is just a counter), then grab a few instructions out of the cache - // line buffer until I either run out or can execute up until my limit. - - numCycles++; - - traceData = NULL; - -// Fault fault = NoFault; - -#if 0 // FULL_SYSTEM - if (checkInterrupts && check_interrupts() && !inPalMode() && - status() != IcacheMissComplete) { - int ipl = 0; - int summary = 0; - checkInterrupts = false; - - if (readMiscReg(IPR_SIRR)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (readMiscReg(IPR_SIRR) & (ULL(1) << i)) { - // See table 4-19 of 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - // Is this method so that if the interrupts are switched over from - // another CPU they'll still be handled? -// uint64_t interrupts = cpuXC->cpu->intr_status(); - uint64_t interrupts = intr_status(); - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - - if (readMiscReg(IPR_ASTRR)) - panic("asynchronous traps not implemented\n"); - - if (ipl && ipl > readMiscReg(IPR_IPLR)) { - setMiscReg(IPR_ISR, summary); - setMiscReg(IPR_INTID, ipl); - - Fault(new InterruptFault)->invoke(xc); - - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - readMiscReg(IPR_IPLR), ipl, summary); - } - } -#endif - - // Make call to ISA to ensure 0 register semantics...actually because the - // DynInsts will generally be the register file, this should only have to - // happen when the xc is actually written to (during a syscall or something) - // maintain $r0 semantics -// assert(renameTable[ZeroReg]->readIntResult() == 0); -#ifdef TARGET_ALPHA -// assert(renameTable[ZeroReg]->readDoubleResult() == 0); -#endif // TARGET_ALPHA - comm.advance(); frontEnd->tick(); backEnd->tick(); @@ -876,8 +819,8 @@ OzoneCPU::processInterrupts() thread.setMiscReg(IPR_INTID, ipl); // @todo: Make this more transparent if (checker) { - checkerXC->setMiscReg(IPR_ISR, summary); - checkerXC->setMiscReg(IPR_INTID, ipl); + checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); + checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); } Fault fault = new InterruptFault; fault->invoke(thread.getXCProxy()); @@ -993,6 +936,15 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) setFuncExeInst(old_context->readFuncExeInst()); #endif + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's XC at this XC so that it wakes up + // the proper CPU. + other_quiesce->xc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->xc = this; + } // storeCondFailures = 0; cpu->lockFlag = false; @@ -1016,7 +968,7 @@ OzoneCPU::OzoneXC::unserialize(Checkpoint *cp, const std::string §ion) #if FULL_SYSTEM template -Event * +EndQuiesceEvent * OzoneCPU::OzoneXC::getQuiesceEvent() { return thread->quiesceEvent; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 188925ae5..f9db9ea5c 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -68,6 +68,8 @@ class FrontEnd void switchOut(); + void doSwitchOut(); + void takeOverFrom(ExecContext *old_xc = NULL); bool isSwitchedOut() { return switchedOut; } diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index a3eb809d0..8ae9ec696 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -240,6 +240,9 @@ template void FrontEnd::tick() { + if (switchedOut) + return; + // @todo: Maybe I want to just have direct communication... if (fromCommit->doneSeqNum) { branchPred.update(fromCommit->doneSeqNum, 0); @@ -828,6 +831,13 @@ void FrontEnd::switchOut() { switchedOut = true; + cpu->signalSwitched(); +} + +template +void +FrontEnd::doSwitchOut() +{ memReq = NULL; squash(0, 0); instBuffer.clear(); diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh index 6519b79e5..4039d8384 100644 --- a/cpu/ozone/inorder_back_end.hh +++ b/cpu/ozone/inorder_back_end.hh @@ -97,6 +97,10 @@ class InorderBackEnd Addr commitPC; + void switchOut() { panic("Not implemented!"); } + void doSwitchOut() { panic("Not implemented!"); } + void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); } + public: FullCPU *cpu; @@ -330,14 +334,17 @@ InorderBackEnd::read(MemReqPtr &req, T &data, int load_idx) // translate to physical address // Fault fault = cpu->translateDataReadReq(req); + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + req->flags &= ~INST_READ; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); // if we have a cache, do cache access too if (dcacheInterface) { - req->cmd = Read; - req->completionEvent = NULL; - req->data = new uint8_t[64]; - req->time = curTick; - req->flags &= ~INST_READ; MemAccessResult result = dcacheInterface->access(req); // Ugly hack to get an event scheduled *only* if the access is @@ -372,6 +379,30 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) // translate to physical address // Fault fault = cpu->translateDataWriteReq(req); + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&data, req->size); + + switch(req->size) { + case 1: + cpu->write(req, (uint8_t &)data); + break; + case 2: + cpu->write(req, (uint16_t &)data); + break; + case 4: + cpu->write(req, (uint32_t &)data); + break; + case 8: + cpu->write(req, (uint64_t &)data); + break; + default: + panic("Unexpected store size!\n"); + } + if (dcacheInterface) { req->cmd = Write; req->data = new uint8_t[64]; @@ -395,7 +426,7 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) } } - +/* if (req->flags & LOCKED) { if (req->flags & UNCACHEABLE) { // Don't update result register (see stq_c in isa_desc) @@ -404,6 +435,7 @@ InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) req->result = 1; } } +*/ /* if (res && (fault == NoFault)) *res = req->result; diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index 028fdaf8c..770b66ad5 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -187,7 +187,7 @@ class LWBackEnd void instToCommit(DynInstPtr &inst); void switchOut(); - + void doSwitchOut(); void takeOverFrom(ExecContext *old_xc = NULL); bool isSwitchedOut() { return switchedOut; } @@ -314,6 +314,7 @@ class LWBackEnd bool fetchHasFault; bool switchedOut; + bool switchPending; DynInstPtr memBarrier; diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index d4829629d..a82dd5b70 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -192,6 +192,7 @@ LWBackEnd::LWBackEnd(Params *params) numWaitingMemOps = 0; waitingInsts = 0; switchedOut = false; + switchPending = false; // IQ.setBE(this); LSQ.setBE(this); @@ -631,6 +632,11 @@ LWBackEnd::tick() { DPRINTF(BE, "Ticking back end\n"); + if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { + cpu->signalSwitched(); + return; + } + ROB_count[0]+= numInsts; wbCycle = 0; @@ -682,6 +688,7 @@ LWBackEnd::tick() assert(numInsts == instList.size()); assert(waitingInsts == waitingList.size()); assert(numWaitingMemOps == waitingMemOps.size()); + assert(!switchedOut); #endif } @@ -1440,12 +1447,24 @@ LWBackEnd::fetchFault(Fault &fault) template void LWBackEnd::switchOut() +{ + switchPending = true; +} + +template +void +LWBackEnd::doSwitchOut() { switchedOut = true; + switchPending = false; // Need to get rid of all committed, non-speculative state and write it // to memory/XC. In this case this is stores that have committed and not // yet written back. + assert(robEmpty()); + assert(!LSQ.hasStoresToWB()); + LSQ.switchOut(); + squash(0); } diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 9b7e48f96..fdf6bff07 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -791,6 +791,8 @@ template void OzoneLWLSQ::switchOut() { +// assert(loads == 0); + assert(storesToWB == 0); switchedOut = true; SQIt sq_it = --(storeQueue.end()); while (storesToWB > 0 && @@ -810,9 +812,12 @@ OzoneLWLSQ::switchOut() // Store conditionals don't complete until *after* they have written // back. If it's here and not yet sent to memory, then don't bother // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed || - (*sq_it).req->flags & LOCKED) { + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } else if ((*sq_it).req->flags & LOCKED) { sq_it--; + assert(!(*sq_it).canWB || ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); continue; } -- cgit v1.2.3 From 36581a534240c322e1fc28b8bd6e8f13f2b0fefd Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 17 May 2006 14:25:10 -0400 Subject: Faults generated at fetch are passed to the backend by creating a dummy nop instruction and giving it the fault. This unifies front end faults and normal instruction faults. cpu/checker/cpu.cc: Fixups for fetch fault being sent with the instruction. cpu/o3/fetch_impl.hh: cpu/ozone/front_end_impl.hh: Send any faults generated at fetch along with a fake nop instruction to the back end. This avoids having to use direct communication to check if the entire front end has drained; it is naturally handled through the nop's fault being handled when it reaches the head of commit. cpu/ozone/front_end.hh: Add extra status TrapPending. cpu/ozone/lw_back_end_impl.hh: Fetch fault handled through a dummy nop carrying the fetch fault. Avoid putting Nops on the exeList. --HG-- extra : convert_revision : 8d9899748b34c204763a49c48a9b5113864f5789 --- cpu/ozone/front_end.hh | 1 + cpu/ozone/front_end_impl.hh | 36 ++++++++++++++++++++++++++++-------- cpu/ozone/lw_back_end_impl.hh | 10 ++++++++-- 3 files changed, 37 insertions(+), 10 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index f9db9ea5c..326f7d2c9 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -120,6 +120,7 @@ class FrontEnd SerializeComplete, RenameBlocked, QuiescePending, + TrapPending, BEBlocked }; diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 8ae9ec696..cd57aeef4 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -268,11 +268,9 @@ FrontEnd::tick() } if (status == RenameBlocked || status == SerializeBlocked || - status == BEBlocked) { - // This might cause the front end to run even though it - // shouldn't, but this should only be a problem for one cycle. - // Also will cause a one cycle bubble between changing state - // and restarting. + status == TrapPending || status == BEBlocked) { + // Will cause a one cycle bubble between changing state and + // restarting. DPRINTF(FE, "In blocked status.\n"); fetchBlockedCycles++; @@ -537,9 +535,32 @@ void FrontEnd::handleFault(Fault &fault) { DPRINTF(FE, "Fault at fetch, telling commit\n"); - backEnd->fetchFault(fault); +// backEnd->fetchFault(fault); // We're blocked on the back end until it handles this fault. - status = BEBlocked; + status = TrapPending; + + // Get a sequence number. + InstSeqNum inst_seq = getAndIncrementInstSeq(); + // We will use a nop in order to carry the fault. + ExtMachInst ext_inst = TheISA::NoopMachInst; + + // Create a new DynInst from the dummy nop. + DynInstPtr instruction = new DynInst(ext_inst, PC, + PC+sizeof(MachInst), + inst_seq, cpu); + instruction->setPredTarg(instruction->readNextPC()); +// instruction->setThread(tid); + +// instruction->setASID(tid); + + instruction->setState(thread); + + instruction->traceData = NULL; + + instruction->fault = fault; + instruction->setCanIssue(); + instBuffer.push_back(instruction); + ++instBufferSize; } template @@ -881,7 +902,6 @@ FrontEnd::dumpInsts() (*buff_it)->isSquashed()); buff_it++; } - } template diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index a82dd5b70..db0872e52 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -652,7 +652,7 @@ LWBackEnd::tick() squashFromTrap(); } else if (xcSquash) { squashFromXC(); - } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { + } /*else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); Fault fetch_fault = frontEnd->getFault(); if (fetch_fault == NoFault) { @@ -662,7 +662,7 @@ LWBackEnd::tick() handleFault(fetch_fault); fetchHasFault = false; } - } + }*/ #endif if (dispatchStatus != Blocked) { @@ -777,6 +777,12 @@ LWBackEnd::dispatchInsts() inst->seqNum); exeList.push(inst); } + } else if (inst->isNop()) { + DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n", + inst->seqNum); + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); } else { DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", inst->seqNum); -- cgit v1.2.3 From c7e7d07ec395156015e3baf52048c403d28a6442 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 19 May 2006 14:27:46 -0400 Subject: Fixes for regression build errors. --HG-- extra : convert_revision : 1f59c853cb0e327d7cf586021b5139f1242e4f28 --- cpu/ozone/cpu.hh | 1 - cpu/ozone/cpu_impl.hh | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 1d522b2fa..7e12e75e5 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -89,7 +89,6 @@ class OzoneCPU : public BaseCPU typedef typename Impl::FrontEnd FrontEnd; typedef typename Impl::BackEnd BackEnd; typedef typename Impl::DynInst DynInst; - typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; typedef TheISA::MiscReg MiscReg; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index b085f077f..031b4b145 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -934,8 +934,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) setCpuId(old_context->readCpuId()); #if !FULL_SYSTEM setFuncExeInst(old_context->readFuncExeInst()); -#endif - +#else EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); if (other_quiesce) { // Point the quiesce event's XC at this XC so that it wakes up @@ -947,6 +946,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) } // storeCondFailures = 0; cpu->lockFlag = false; +#endif old_context->setStatus(ExecContext::Unallocated); } -- cgit v1.2.3 From ff3d16ca1f7d83ce7932868d2bf1cb3e526562ea Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 23 May 2006 16:51:16 -0400 Subject: Move kernel stats out of CPU and into XC. arch/alpha/ev5.cc: Move kernel stats out of CPU and into XC. Also be sure to check if the kernel stats exist prior to using them. --HG-- extra : convert_revision : 565cd7026410fd7d8586f953d9b328c2e67a9473 --- cpu/ozone/cpu.hh | 28 ++----- cpu/ozone/cpu_impl.hh | 211 ++++++++------------------------------------------ 2 files changed, 41 insertions(+), 198 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh index 7e12e75e5..5af2b02b2 100644 --- a/cpu/ozone/cpu.hh +++ b/cpu/ozone/cpu.hh @@ -57,6 +57,10 @@ class Sampler; class RemoteGDB; class GDBListener; +namespace Kernel { + class Statistics; +}; + #else class Process; @@ -116,6 +120,8 @@ class OzoneCPU : public BaseCPU AlphaITB *getITBPtr() { return cpu->itb; } AlphaDTB * getDTBPtr() { return cpu->dtb; } + + Kernel::Statistics *getKernelStats() { return thread->kernelStats; } #else Process *getProcessPtr() { return thread->process; } #endif @@ -238,14 +244,7 @@ class OzoneCPU : public BaseCPU private: OzoneThreadState thread; -/* - // Squash event for when the XC needs to squash all inflight instructions. - struct XCSquashEvent : public Event - { - void process(); - const char *description(); - }; -*/ + public: // main simulation loop (one cycle) void tick(); @@ -288,7 +287,6 @@ class OzoneCPU : public BaseCPU void trace_data(T data); public: - // enum Status { Running, Idle, @@ -325,8 +323,6 @@ class OzoneCPU : public BaseCPU int readCpuId() { return cpuId; } -// FunctionalMemory *getMemPtr() { return mem; } - int cpuId; void switchOut(Sampler *sampler); @@ -369,8 +365,6 @@ class OzoneCPU : public BaseCPU Status status() const { return _status; } void setStatus(Status new_status) { _status = new_status; } - // Not sure what an activate() call on the CPU's proxy XC would mean... - virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); virtual void deallocateContext(int thread_num); @@ -384,7 +378,6 @@ class OzoneCPU : public BaseCPU public: Counter numInst; Counter startNumInst; -// Stats::Scalar<> numInsts; virtual Counter totalInstructions() const { @@ -392,9 +385,6 @@ class OzoneCPU : public BaseCPU } private: - // number of simulated memory references -// Stats::Scalar<> numMemRefs; - // number of simulated loads Counter numLoad; Counter startNumLoad; @@ -472,7 +462,6 @@ class OzoneCPU : public BaseCPU template Fault read(MemReqPtr &req, T &data) { -// panic("CPU READ NOT IMPLEMENTED W/NEW MEMORY\n"); #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) if (req->flags & LOCKED) { @@ -483,7 +472,6 @@ class OzoneCPU : public BaseCPU #endif Fault error; if (req->flags & LOCKED) { -// lockAddr = req->paddr; lockAddrList.insert(req->paddr); lockFlag = true; } @@ -558,7 +546,7 @@ class OzoneCPU : public BaseCPU if (req->flags & UNCACHEABLE) { req->result = 2; } else { - if (this->lockFlag/* && this->lockAddr == req->paddr*/) { + if (this->lockFlag) { if (lockAddrList.find(req->paddr) != lockAddrList.end()) { req->result = 1; diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh index 031b4b145..5675da3a8 100644 --- a/cpu/ozone/cpu_impl.hh +++ b/cpu/ozone/cpu_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include -#include +//#include +//#include #include "arch/isa_traits.hh" // For MachInst #include "base/trace.hh" @@ -39,7 +39,7 @@ #include "cpu/ozone/cpu.hh" #include "cpu/quiesce_event.hh" #include "cpu/static_inst.hh" -#include "mem/base_mem.hh" +//#include "mem/base_mem.hh" #include "mem/mem_interface.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -50,7 +50,7 @@ #include "arch/alpha/tlb.hh" #include "arch/vtophys.hh" #include "base/callback.hh" -#include "base/remote_gdb.hh" +//#include "base/remote_gdb.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" #include "mem/functional/memory_control.hh" @@ -94,80 +94,26 @@ OzoneCPU::TickEvent::description() { return "OzoneCPU tick event"; } -/* -template -OzoneCPU::ICacheCompletionEvent::ICacheCompletionEvent(OzoneCPU *_cpu) - : Event(&mainEventQueue), - cpu(_cpu) -{ -} - -template -void -OzoneCPU::ICacheCompletionEvent::process() -{ - cpu->processICacheCompletion(); -} - -template -const char * -OzoneCPU::ICacheCompletionEvent::description() -{ - return "OzoneCPU I-cache completion event"; -} - -template -OzoneCPU::DCacheCompletionEvent:: -DCacheCompletionEvent(OzoneCPU *_cpu, - DynInstPtr &_inst, - DCacheCompEventIt &_dcceIt) - : Event(&mainEventQueue), - cpu(_cpu), - inst(_inst), - dcceIt(_dcceIt) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -OzoneCPU::DCacheCompletionEvent::process() -{ - inst->setCompleted(); - - // Maybe remove the EA from the list of addrs? - cpu->eaList.clearAddr(inst->seqNum, inst->getEA()); - cpu->dCacheCompList.erase(this->dcceIt); -} -template -const char * -OzoneCPU::DCacheCompletionEvent::description() -{ - return "OzoneCPU D-cache completion event"; -} -*/ template OzoneCPU::OzoneCPU(Params *p) #if FULL_SYSTEM - : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), mem(p->mem), + : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), + mem(p->mem), #else : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), mem(p->workload[0]->getMemory()), #endif comm(5, 5) { - if (p->checker) { - BaseCPU *temp_checker = p->checker; - checker = dynamic_cast *>(temp_checker); - } else { - checker = NULL; - } frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); _status = Idle; - if (checker) { + + if (p->checker) { + BaseCPU *temp_checker = p->checker; + checker = dynamic_cast *>(temp_checker); checker->setMemory(mem); #if FULL_SYSTEM checker->setSystem(p->system); @@ -176,19 +122,18 @@ OzoneCPU::OzoneCPU(Params *p) thread.xcProxy = checkerXC; xcProxy = checkerXC; } else { + checker = NULL; thread.xcProxy = &ozoneXC; xcProxy = &ozoneXC; } - thread.inSyscall = false; - ozoneXC.cpu = this; ozoneXC.thread = &thread; + thread.inSyscall = false; + thread.setStatus(ExecContext::Suspended); #if FULL_SYSTEM -// xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem); - /***** All thread state stuff *****/ thread.cpu = this; thread.tid = 0; @@ -217,31 +162,15 @@ OzoneCPU::OzoneCPU(Params *p) thread.profileNode = &dummyNode; thread.profilePC = 3; #else -// xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0); thread.cpu = this; thread.tid = 0; thread.process = p->workload[0]; -// thread.mem = thread.process->getMemory(); thread.asid = 0; #endif // !FULL_SYSTEM -/* - icacheInterface = p->icache_interface; - dcacheInterface = p->dcache_interface; - - cacheMemReq = new MemReq(); - cacheMemReq->xc = xc; - cacheMemReq->asid = 0; - cacheMemReq->data = new uint8_t[64]; -*/ + numInst = 0; startNumInst = 0; -/* numLoad = 0; - startNumLoad = 0; - lastIcacheStall = 0; - lastDcacheStall = 0; - issueWidth = p->issueWidth; -*/ execContexts.push_back(xcProxy); frontEnd->setCPU(this); @@ -286,47 +215,7 @@ template OzoneCPU::~OzoneCPU() { } -/* -template -void -OzoneCPU::copyFromXC() -{ - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - if (i < TheISA::NumIntRegs) { - renameTable[i]->setIntResult(xc->readIntReg(i)); - } else if (i < TheISA::NumFloatRegs) { - renameTable[i]->setDoubleResult(xc->readFloatRegDouble(i)); - } - } - - DPRINTF(OzoneCPU, "Func Exe inst is: %i\n", xc->func_exe_inst); - backEnd->funcExeInst = xc->func_exe_inst; -// PC = xc->readPC(); -// nextPC = xc->regs.npc; -} - -template -void -OzoneCPU::copyToXC() -{ - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - if (i < TheISA::NumIntRegs) { - xc->setIntReg(i, renameTable[i]->readIntResult()); - } else if (i < TheISA::NumFloatRegs) { - xc->setFloatRegDouble(i, renameTable[i]->readDoubleResult()); - } - } - - this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs[tid].fpcr; - this->xc->regs.miscRegs.uniq = this->regFile.miscRegs[tid].uniq; - this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs[tid].lock_flag; - this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs[tid].lock_addr; - xc->func_exe_inst = backEnd->funcExeInst; - xc->regs.pc = PC; - xc->regs.npc = nextPC; -} -*/ template void OzoneCPU::switchOut(Sampler *_sampler) @@ -394,7 +283,6 @@ OzoneCPU::activateContext(int thread_num, int delay) { // Eventually change this in SMT. assert(thread_num == 0); -// assert(xcProxy); assert(_status == Idle); notIdleFraction++; @@ -410,8 +298,8 @@ OzoneCPU::suspendContext(int thread_num) { // Eventually change this in SMT. assert(thread_num == 0); -// assert(xcProxy); - // @todo: Figure out how to initially set the status properly so this is running. + // @todo: Figure out how to initially set the status properly so + // this is running. // assert(_status == Running); notIdleFraction--; unscheduleTickEvent(); @@ -486,14 +374,7 @@ void OzoneCPU::init() { BaseCPU::init(); -/* - copyFromXC(); - // ALso copy over PC/nextPC. This isn't normally copied in "copyFromXC()" - // so that the XC doesn't mess up the PC when returning from a syscall. - PC = xc->readPC(); - nextPC = xc->regs.npc; -*/ // Mark this as in syscall so it won't need to squash thread.inSyscall = true; #if FULL_SYSTEM @@ -514,8 +395,6 @@ template void OzoneCPU::serialize(std::ostream &os) { - // At this point, all DCacheCompEvents should be processed. - BaseCPU::serialize(os); SERIALIZE_ENUM(_status); nameOut(os, csprintf("%s.xc", name())); @@ -631,31 +510,7 @@ OzoneCPU::dbg_vtophys(Addr addr) return vtophys(xcProxy, addr); } #endif // FULL_SYSTEM -/* -template -void -OzoneCPU::processICacheCompletion() -{ - switch (status()) { - case IcacheMiss: - DPRINTF(OzoneCPU, "OzoneCPU: Finished Icache miss.\n"); - - icacheStallCycles += curTick - lastIcacheStall; - _status = IcacheMissComplete; - cacheBlkValid = true; -// scheduleTickEvent(1); - break; - case SwitchedOut: - // If this CPU has been switched out due to sampling/warm-up, - // ignore any further status changes (e.g., due to cache - // misses outstanding at the time of the switch). - return; - default: - panic("OzoneCPU::processICacheCompletion: bad state"); - break; - } -} -*/ + #if FULL_SYSTEM template void @@ -663,7 +518,6 @@ OzoneCPU::post_interrupt(int int_num, int index) { BaseCPU::post_interrupt(int_num, index); -// if (thread._status == ExecContext::Suspended) { if (_status == Idle) { DPRINTF(IPI,"Suspended Processor awoke\n"); // thread.activate(); @@ -690,9 +544,6 @@ OzoneCPU::tick() frontEnd->tick(); backEnd->tick(); - // Do this here? For now the front end will control the PC. -// PC = nextPC; - // check for instruction-count-based events comInstEventQueue[0]->serviceEvents(numInst); @@ -742,11 +593,13 @@ OzoneCPU::setSyscallReturn(SyscallReturn return_value, int tid) if (return_value.successful()) { // no error thread.renameTable[SyscallSuccessReg]->setIntResult(0); - thread.renameTable[ReturnValueReg]->setIntResult(return_value.value()); + thread.renameTable[ReturnValueReg]->setIntResult( + return_value.value()); } else { // got an error, return details thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1); - thread.renameTable[ReturnValueReg]->setIntResult(-return_value.value()); + thread.renameTable[ReturnValueReg]->setIntResult( + -return_value.value()); } } #else @@ -756,15 +609,10 @@ OzoneCPU::hwrei() { // Need to move this to ISA code // May also need to make this per thread -/* - if (!inPalMode()) - return new UnimplementedOpcodeFault; - thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR)); -*/ lockFlag = false; lockAddrList.clear(); - kernelStats->hwrei(); + thread.kernelStats->hwrei(); checkInterrupts = true; @@ -835,7 +683,7 @@ OzoneCPU::simPalCheck(int palFunc) { // Need to move this to ISA code // May also need to make this per thread - this->kernelStats->callpal(palFunc, xcProxy); + thread.kernelStats->callpal(palFunc, xcProxy); switch (palFunc) { case PAL::halt: @@ -874,7 +722,6 @@ template void OzoneCPU::OzoneXC::setStatus(Status new_status) { -// cpu->_status = new_status; thread->_status = new_status; } @@ -932,6 +779,7 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) setStatus(old_context->status()); copyArchRegs(old_context); setCpuId(old_context->readCpuId()); + #if !FULL_SYSTEM setFuncExeInst(old_context->readFuncExeInst()); #else @@ -944,6 +792,8 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) if (thread->quiesceEvent) { thread->quiesceEvent->xc = this; } + + thread->kernelStats = old_context->getKernelStats(); // storeCondFailures = 0; cpu->lockFlag = false; #endif @@ -954,7 +804,12 @@ OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) template void OzoneCPU::OzoneXC::regStats(const std::string &name) -{ } +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} template void -- cgit v1.2.3 From 6c386396faef6f48f2d01911e59d09b192bf3c45 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 23 May 2006 16:57:14 -0400 Subject: Code cleanup. cpu/base_dyn_inst.hh: Code cleanup --HG-- extra : convert_revision : 501c03f8e4346ffbcb545ddeee30c1f8ded9baa7 --- cpu/ozone/dyn_inst.hh | 52 +-------------------- cpu/ozone/dyn_inst_impl.hh | 7 +-- cpu/ozone/front_end.hh | 34 +++++++++++--- cpu/ozone/front_end_impl.hh | 66 ++++++++++++--------------- cpu/ozone/lw_back_end.hh | 103 +++++++++++------------------------------- cpu/ozone/lw_back_end_impl.hh | 67 ++++++++++++++++----------- cpu/ozone/lw_lsq.hh | 27 ++--------- cpu/ozone/lw_lsq_impl.hh | 41 ++++------------- cpu/ozone/rename_table.hh | 28 ++++++++++++ cpu/ozone/thread_state.hh | 38 ++++++++++++---- 10 files changed, 194 insertions(+), 269 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh index f251c28ea..5d48bb361 100644 --- a/cpu/ozone/dyn_inst.hh +++ b/cpu/ozone/dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,8 +52,6 @@ class OzoneDynInst : public BaseDynInst // Typedef for DynInstPtr. This is really just a RefCountingPtr. typedef typename Impl::DynInstPtr DynInstPtr; -// typedef typename Impl::BranchPred::BPredInfo BPredInfo; - typedef TheISA::ExtMachInst ExtMachInst; typedef TheISA::MachInst MachInst; typedef TheISA::MiscReg MiscReg; @@ -107,12 +105,6 @@ class OzoneDynInst : public BaseDynInst // up. In the future, you only really need a counter. bool memDepReady() { return srcMemInsts.empty(); } -// void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; } - -// BPredInfo &getBPredInfo() { return bpInfo; } - -// OzoneXC *thread; - private: void initInstPtrs(); @@ -133,20 +125,12 @@ class OzoneDynInst : public BaseDynInst */ DynInstPtr prevDestInst[MaxInstSrcRegs]; -// BPredInfo bpInfo; - public: Fault initiateAcc(); Fault completeAcc(); -/* - template - Fault read(Addr addr, T &data, unsigned flags); - template - Fault write(T data, Addr addr, unsigned flags, uint64_t *res); -*/ // The register accessor methods provide the index of the // instruction's operand (e.g., 0 or 1), not the architectural // register index, to simplify the implementation of register @@ -244,38 +228,4 @@ class OzoneDynInst : public BaseDynInst bool iqItValid; }; -/* -template -template -inline Fault -OzoneDynInst::read(Addr addr, T &data, unsigned flags) -{ - Fault fault = this->cpu->read(addr, data, flags, this); - - if (this->traceData) { - this->traceData->setAddr(addr); - this->traceData->setData(data); - } - - return fault; -} - -template -template -inline Fault -OzoneDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) -{ - Fault fault = this->cpu->write(data, addr, flags, res, this); - - this->storeSize = sizeof(T); - this->storeData = data; - - if (this->traceData) { - this->traceData->setAddr(addr); - this->traceData->setData(data); - } - - return fault; -} -*/ #endif // __CPU_OZONE_DYN_INST_HH__ diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh index a7e4460a1..f891ec515 100644 --- a/cpu/ozone/dyn_inst_impl.hh +++ b/cpu/ozone/dyn_inst_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -266,12 +266,7 @@ OzoneDynInst::hwrei() this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); this->cpu->hwrei(); -/* - this->cpu->kernelStats->hwrei(); - this->cpu->checkInterrupts = true; - this->cpu->lockFlag = false; -*/ // FIXME: XXX check for interrupts? XXX return NoFault; } diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh index 326f7d2c9..dd382491f 100644 --- a/cpu/ozone/front_end.hh +++ b/cpu/ozone/front_end.hh @@ -1,14 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_OZONE_FRONT_END_HH__ #define __CPU_OZONE_FRONT_END_HH__ #include -//#include "cpu/ozone/cpu.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/rename_table.hh" -//#include "cpu/ozone/thread_state.hh" #include "mem/mem_req.hh" #include "sim/eventq.hh" #include "sim/stats.hh" @@ -132,11 +157,6 @@ class FrontEnd typedef typename Impl::BranchPred BranchPred; - // Typedef for semi-opaque type that holds any information the branch - // predictor needs to update itself. Only two fields are used outside of - // branch predictor, nextPC and isTaken. -// typedef typename BranchPred::BPredInfo BPredInfo; - BranchPred branchPred; class ICacheCompletionEvent : public Event diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index cd57aeef4..15adae9b4 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #include "arch/faults.hh" #include "arch/isa_traits.hh" @@ -26,14 +53,6 @@ FrontEnd::FrontEnd(Params *params) status = Idle; - // Setup branch predictor. - - // Setup Memory Request -/* - memReq = new MemReq(); - memReq->asid = 0; - memReq->data = new uint8_t[64]; -*/ memReq = NULL; // Size of cache block. cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; @@ -77,7 +96,6 @@ void FrontEnd::setXC(ExecContext *xc_ptr) { xc = xc_ptr; -// memReq->xc = xc; } template @@ -321,7 +339,6 @@ FrontEnd::tick() break; } - // if (generalizeFetch) { processInst(inst); if (status == SerializeBlocked) { @@ -333,11 +350,6 @@ FrontEnd::tick() instBuffer.push_back(inst); ++instBufferSize; ++num_inst; - // } else { - // fetch(num_inst); - // decode(num_inst); - // rename(num_inst); - // } #if FULL_SYSTEM if (inst->isQuiesce()) { @@ -402,10 +414,6 @@ FrontEnd::fetchCacheLine() // Translate the instruction request. fault = cpu->translateInstReq(memReq); - // In the case of faults, the fetch stage may need to stall and wait - // on what caused the fetch (ITB or Icache miss). -// assert(fault == NoFault); - // Now do the timing access to see whether or not the instruction // exists within the cache. if (icacheInterface && fault == NoFault) { @@ -466,7 +474,6 @@ FrontEnd::processInst(DynInstPtr &inst) Addr inst_PC = inst->readPC(); -// BPredInfo bp_info = branchPred.lookup(inst_PC); if (!inst->isControl()) { inst->setPredTarg(inst->readNextPC()); } else { @@ -482,7 +489,6 @@ FrontEnd::processInst(DynInstPtr &inst) "%#x\n", inst->seqNum, inst_PC, next_PC); // inst->setNextPC(next_PC); -// inst->setBPredInfo(bp_info); // Not sure where I should set this PC = next_PC; @@ -535,7 +541,7 @@ void FrontEnd::handleFault(Fault &fault) { DPRINTF(FE, "Fault at fetch, telling commit\n"); -// backEnd->fetchFault(fault); + // We're blocked on the back end until it handles this fault. status = TrapPending; @@ -586,9 +592,6 @@ FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, instBuffer.pop_back(); --instBufferSize; - // Fix up branch predictor if necessary. -// branchPred.undo(inst->getBPredInfo()); - freeRegs+= inst->numDestRegs(); } @@ -607,7 +610,6 @@ FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, // Clear the icache miss if it's outstanding. if (status == IcacheMissStall && icacheInterface) { DPRINTF(FE, "Squashing outstanding Icache miss.\n"); -// icacheInterface->squash(0); memReq = NULL; } @@ -693,17 +695,9 @@ template bool FrontEnd::updateStatus() { -// bool rename_block = freeRegs <= 0; bool serialize_block = !backEnd->robEmpty() || instBufferSize; bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); bool ret_val = false; -/* - // Should already be handled through addFreeRegs function - if (status == RenameBlocked && !rename_block) { - status = Running; - ret_val = true; - } -*/ if (status == SerializeBlocked && !serialize_block) { status = SerializeComplete; @@ -753,10 +747,6 @@ FrontEnd::getInstFromCacheline() // PC of inst is not in this cache block if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { -// DPRINTF(OoOCPU, "OoOCPU: PC is not in this cache block\n"); -// DPRINTF(OoOCPU, "OoOCPU: PC: %#x, cacheBlkPC: %#x, cacheBlkValid: %i", -// PC, cacheBlkPC, cacheBlkValid); -// panic("Instruction not in cache line or cache line invalid!"); return NULL; } diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh index 770b66ad5..1c03ffb73 100644 --- a/cpu/ozone/lw_back_end.hh +++ b/cpu/ozone/lw_back_end.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_OZONE_LW_BACK_END_HH__ #define __CPU_OZONE_LW_BACK_END_HH__ @@ -238,10 +265,6 @@ class LWBackEnd Counter funcExeInst; private: -// typedef typename Impl::InstQueue InstQueue; - -// InstQueue IQ; - typedef typename Impl::LdstQueue LdstQueue; LdstQueue LSQ; @@ -342,8 +365,6 @@ class LWBackEnd bool exactFullStall; -// bool fetchRedirect[Impl::MaxThreads]; - // number of cycles stalled for D-cache misses /* Stats::Scalar<> dcacheStallCycles; Counter lastDcacheStall; @@ -438,43 +459,6 @@ template Fault LWBackEnd::read(MemReqPtr &req, T &data, int load_idx) { -/* memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataReadReq(memReq); - - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Read; - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT && dcacheInterface->doEvents()) { - // Fix this hack for keeping funcExeInst correct with loads that - // are executed twice. - --funcExeInst; - - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); -// status = DcacheMissStall; - DPRINTF(OzoneCPU, "Dcache miss stall!\n"); - } else { - // do functional access - fault = thread->mem->read(memReq, data); - - } - } -*/ -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); -*/ return LSQ.read(req, data, load_idx); } @@ -483,39 +467,6 @@ template Fault LWBackEnd::write(MemReqPtr &req, T &data, int store_idx) { -/* - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(memReq); - - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Write; - memcpy(memReq->data,(uint8_t *)&data,memReq->size); - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT && dcacheInterface->doEvents()) { - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); -// status = DcacheMissStall; - DPRINTF(OzoneCPU, "Dcache miss stall!\n"); - } - } - - if (res && (fault == NoFault)) - *res = memReq->result; - */ -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); -*/ return LSQ.write(req, data, store_idx); } diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index db0872e52..881d6e6b1 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -1,7 +1,34 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ -#include "encumbered/cpu/full/op_class.hh" #include "cpu/checker/cpu.hh" #include "cpu/ozone/lw_back_end.hh" +#include "encumbered/cpu/full/op_class.hh" template void @@ -194,7 +221,6 @@ LWBackEnd::LWBackEnd(Params *params) switchedOut = false; switchPending = false; -// IQ.setBE(this); LSQ.setBE(this); // Setup IQ and LSQ with their parameters here. @@ -202,8 +228,6 @@ LWBackEnd::LWBackEnd(Params *params) instsToExecute = i2e.getWire(-1); -// IQ.setIssueExecQueue(&i2e); - dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; issueWidth = params->issueWidth ? params->issueWidth : width; wbWidth = params->wbWidth ? params->wbWidth : width; @@ -538,8 +562,6 @@ LWBackEnd::regStats() .desc("ROB Occupancy per cycle") .flags(total | cdf) ; - -// IQ.regStats(); } template @@ -652,17 +674,7 @@ LWBackEnd::tick() squashFromTrap(); } else if (xcSquash) { squashFromXC(); - } /*else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) { - DPRINTF(BE, "ROB and front end empty, handling fetch fault\n"); - Fault fetch_fault = frontEnd->getFault(); - if (fetch_fault == NoFault) { - DPRINTF(BE, "Fetch no longer has a fault, cancelling out.\n"); - fetchHasFault = false; - } else { - handleFault(fetch_fault); - fetchHasFault = false; - } - }*/ + } #endif if (dispatchStatus != Blocked) { @@ -773,7 +785,8 @@ LWBackEnd::dispatchInsts() inst->iqItValid = true; waitingInsts++; } else { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", inst->seqNum); exeList.push(inst); } @@ -784,7 +797,8 @@ LWBackEnd::dispatchInsts() inst->setExecuted(); inst->setCanCommit(); } else { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n", + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", inst->seqNum); exeList.push(inst); } @@ -993,7 +1007,7 @@ LWBackEnd::instToCommit(DynInstPtr &inst) writeback_count[0]++; } - +#if 0 template void LWBackEnd::writebackInsts() @@ -1040,7 +1054,7 @@ LWBackEnd::writebackInsts() consumer_inst[0]+= consumer_insts; writeback_count[0]+= inst_num; } - +#endif template bool LWBackEnd::commitInst(int inst_num) @@ -1219,15 +1233,15 @@ LWBackEnd::commitInst(int inst_num) --numInsts; ++thread->funcExeInst; - // Maybe move this to where the fault is handled; if the fault is handled, - // don't try to set this myself as the fault will set it. If not, then - // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4. + // Maybe move this to where the fault is handled; if the fault is + // handled, don't try to set this myself as the fault will set it. + // If not, then I set thread->PC = thread->nextPC and + // thread->nextPC = thread->nextPC + 4. thread->setPC(thread->readNextPC()); thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); updateComInstStats(inst); // Write the done sequence number here. -// LSQ.commitLoads(inst->seqNum); toIEW->doneSeqNum = inst->seqNum; lastCommitCycle = curTick; @@ -1357,7 +1371,8 @@ LWBackEnd::squash(const InstSeqNum &sn) } while (memBarrier && memBarrier->seqNum > sn) { - DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum); + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " + "squashed)\n", memBarrier->seqNum); memBarrier->clearMemDependents(); if (memBarrier->memDepReady()) { DPRINTF(BE, "No previous barrier\n"); diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh index 042610324..6fe343b42 100644 --- a/cpu/ozone/lw_lsq.hh +++ b/cpu/ozone/lw_lsq.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -138,7 +138,6 @@ class OzoneLWLSQ { /** Executes a load instruction. */ Fault executeLoad(DynInstPtr &inst); -// Fault executeLoad(int lq_idx); /** Executes a store instruction. */ Fault executeStore(DynInstPtr &inst); @@ -304,10 +303,8 @@ class OzoneLWLSQ { Status _status; /** The store queue. */ -// std::vector storeQueue; std::list storeQueue; /** The load queue. */ -// std::vector loadQueue; std::list loadQueue; typedef typename std::list::iterator SQIt; @@ -365,7 +362,6 @@ class OzoneLWLSQ { */ InstSeqNum stallingStoreIsn; /** The index of the above store. */ -// int stallingLoadIdx; LQIt stallingLoad; /** Whether or not a load is blocked due to the memory system. It is @@ -398,8 +394,6 @@ class OzoneLWLSQ { template Fault write(MemReqPtr &req, T &data, int store_idx); - /** Returns the index of the head load instruction. */ -// int getLoadHead() { return loadHead; } /** Returns the sequence number of the head load instruction. */ InstSeqNum getLoadHeadSeqNum() { @@ -411,8 +405,6 @@ class OzoneLWLSQ { } - /** Returns the index of the head store instruction. */ -// int getStoreHead() { return storeHead; } /** Returns the sequence number of the head store instruction. */ InstSeqNum getStoreHeadSeqNum() { @@ -604,12 +596,7 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " "vaddr:%#x flags:%i\n", inst->readPC(), req->paddr, req->vaddr, req->flags); -/* - Addr debug_addr = ULL(0xfffffc0000be81a8); - if (req->vaddr == debug_addr) { - debug_break(); - } -*/ + assert(!req->completionEvent); req->completionEvent = new typename BackEnd::LdWritebackEvent(inst, be); @@ -631,9 +618,6 @@ OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) _status = DcacheMissStall; } else { -// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", -// inst->seqNum); - DPRINTF(OzoneLSQ, "D-cache hit!\n"); } } else { @@ -664,12 +648,7 @@ OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) assert(!req->data); req->data = new uint8_t[64]; memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); -/* - Addr debug_addr = ULL(0xfffffc0000be81a8); - if (req->vaddr == debug_addr) { - debug_break(); - } -*/ + // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index fdf6bff07..2f85a0396 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -104,12 +104,6 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, SQIndices.push(i); } - // May want to initialize these entries to NULL - -// loadHead = loadTail = 0; - -// storeHead = storeWBIdx = storeTail = 0; - usedPorts = 0; cachePorts = params->cachePorts; @@ -197,8 +191,6 @@ OzoneLWLSQ::insert(DynInstPtr &inst) } else { insertStore(inst); } - -// inst->setInLSQ(); } template @@ -569,12 +561,9 @@ OzoneLWLSQ::writebackStores() } if (result != MA_HIT && dcacheInterface->doEvents()) { -// Event *wb = NULL; store_event->miss = true; typename BackEnd::LdWritebackEvent *wb = NULL; if (req->flags & LOCKED) { - // Stx_C does not generate a system port transaction. -// req->result=1; wb = new typename BackEnd::LdWritebackEvent(inst, be); store_event->wbEvent = wb; @@ -585,8 +574,6 @@ OzoneLWLSQ::writebackStores() // DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", // inst->seqNum); - // Will stores need their own kind of writeback events? - // Do stores even need writeback events? be->addDcacheMiss(inst); lastDcacheStall = curTick; @@ -604,20 +591,16 @@ OzoneLWLSQ::writebackStores() // inst->seqNum); if (req->flags & LOCKED) { - // Stx_C does not generate a system port transaction. -/* if (req->flags & UNCACHEABLE) { - req->result = 2; - } else { - req->result = 1; - } -*/ + // Stx_C does not generate a system port + // transaction in the 21264, but that might be + // hard to accomplish in this model. + typename BackEnd::LdWritebackEvent *wb = new typename BackEnd::LdWritebackEvent(inst, be); store_event->wbEvent = wb; } sq_it--; -// completeStore(inst->sqIdx); } } else { panic("Must HAVE DCACHE!!!!!\n"); @@ -780,7 +763,7 @@ OzoneLWLSQ::completeStore(int store_idx) SQIndices.push(inst->sqIdx); storeQueue.erase(sq_it); --stores; -// assert(!inst->isCompleted()); + inst->setCompleted(); if (cpu->checker) { cpu->checker->tick(inst); @@ -791,7 +774,6 @@ template void OzoneLWLSQ::switchOut() { -// assert(loads == 0); assert(storesToWB == 0); switchedOut = true; SQIt sq_it = --(storeQueue.end()); @@ -804,8 +786,6 @@ OzoneLWLSQ::switchOut() if ((*sq_it).size == 0 && !(*sq_it).completed) { sq_it--; -// completeStore(inst->sqIdx); - continue; } @@ -817,7 +797,8 @@ OzoneLWLSQ::switchOut() continue; } else if ((*sq_it).req->flags & LOCKED) { sq_it--; - assert(!(*sq_it).canWB || ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); + assert(!(*sq_it).canWB || + ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); continue; } @@ -886,12 +867,6 @@ OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) SQIndices.push(i); } - // May want to initialize these entries to NULL - -// loadHead = loadTail = 0; - -// storeHead = storeWBIdx = storeTail = 0; - usedPorts = 0; loadFaultInst = storeFaultInst = memDepViolator = NULL; diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh index afbf6ff32..6ee23b21b 100644 --- a/cpu/ozone/rename_table.hh +++ b/cpu/ozone/rename_table.hh @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + #ifndef __CPU_OZONE_RENAME_TABLE_HH__ #define __CPU_OZONE_RENAME_TABLE_HH__ diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh index 269fc6459..c86c3a720 100644 --- a/cpu/ozone/thread_state.hh +++ b/cpu/ozone/thread_state.hh @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ #ifndef __CPU_OZONE_THREAD_STATE_HH__ #define __CPU_OZONE_THREAD_STATE_HH__ @@ -62,19 +89,14 @@ struct OzoneThreadState : public ThreadState { void setStatus(Status new_status) { _status = new_status; } - RenameTable renameTable; // Should I include backend and frontend - // tables here? For the ozone CPU, maybe, for the new full CPU, probably - // not...you wouldn't want threads just accessing the backend/frontend - // rename tables. - Addr PC; // What should these be set to? Probably the committed ones. + RenameTable renameTable; + Addr PC; Addr nextPC; - // Current instruction? + // Current instruction TheISA::MachInst inst; TheISA::RegFile regs; - // Front end? Back end? -// MemReqPtr memReq; typename Impl::FullCPU *cpu; -- cgit v1.2.3 From 3fe35232322daef87a0b85d7f3ca4c18330ed7c4 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 24 May 2006 14:31:06 -0400 Subject: Support new flags now used instead of flags in decoder.isa. cpu/ozone/front_end_impl.hh: cpu/ozone/lw_back_end_impl.hh: cpu/ozone/lw_lsq_impl.hh: Support new flags added in. --HG-- extra : convert_revision : 2e756fd1913cf600650afc39dd715d59b9b89c42 --- cpu/ozone/front_end_impl.hh | 10 +++++++--- cpu/ozone/lw_back_end_impl.hh | 12 ++++++++---- cpu/ozone/lw_lsq_impl.hh | 7 +++---- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'cpu/ozone') diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh index 15adae9b4..ffbcf3340 100644 --- a/cpu/ozone/front_end_impl.hh +++ b/cpu/ozone/front_end_impl.hh @@ -503,11 +503,14 @@ FrontEnd::processBarriers(DynInstPtr &inst) if (serializeNext) { inst->setSerializeBefore(); serializeNext = false; - } else if (!inst->isSerializing()) { + } else if (!inst->isSerializing() && + !inst->isIprAccess() && + !inst->isStoreConditional()) { return false; } - if (inst->isSerializeBefore() && !inst->isSerializeHandled()) { + if ((inst->isIprAccess() || inst->isSerializeBefore()) && + !inst->isSerializeHandled()) { DPRINTF(FE, "Serialize before instruction encountered.\n"); if (!inst->isTempSerializeBefore()) { @@ -523,7 +526,8 @@ FrontEnd::processBarriers(DynInstPtr &inst) barrierInst = inst; return true; - } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) { + } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) + && !inst->isSerializeHandled()) { DPRINTF(FE, "Serialize after instruction encountered.\n"); inst->setSerializeHandled(); diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh index 881d6e6b1..41b4ea24b 100644 --- a/cpu/ozone/lw_back_end_impl.hh +++ b/cpu/ozone/lw_back_end_impl.hh @@ -66,8 +66,9 @@ LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); if (dep_inst->readyToIssue() && dep_inst->isInROB() && - !dep_inst->isNonSpeculative() && - dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) { + !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && + !dep_inst->isWriteBarrier()) { DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", dep_inst->seqNum); exeList.push(dep_inst); @@ -768,7 +769,9 @@ LWBackEnd::dispatchInsts() } memBarrier = inst; inst->setCanCommit(); - } else if (inst->readyToIssue() && !inst->isNonSpeculative()) { + } else if (inst->readyToIssue() && + !inst->isNonSpeculative() && + !inst->isStoreConditional()) { if (inst->isMemRef()) { LSQ.insert(inst); @@ -803,7 +806,7 @@ LWBackEnd::dispatchInsts() exeList.push(inst); } } else { - if (inst->isNonSpeculative()) { + if (inst->isNonSpeculative() || inst->isStoreConditional()) { inst->setCanCommit(); DPRINTF(BE, "Adding non speculative instruction\n"); } @@ -1079,6 +1082,7 @@ LWBackEnd::commitInst(int inst_num) // or store inst. Signal backwards that it should be executed. if (!inst->isExecuted()) { if (inst->isNonSpeculative() || + inst->isStoreConditional() || inst->isMemBarrier() || inst->isWriteBarrier()) { #if !FULL_SYSTEM diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh index 2f85a0396..f72bbb1cc 100644 --- a/cpu/ozone/lw_lsq_impl.hh +++ b/cpu/ozone/lw_lsq_impl.hh @@ -364,10 +364,9 @@ OzoneLWLSQ::executeStore(DynInstPtr &store_inst) if (store_fault != NoFault) { panic("Fault in a store instruction!"); storeFaultInst = store_inst; - } else if (store_inst->isNonSpeculative()) { - // Nonspeculative accesses (namely store conditionals) - // need to set themselves as able to writeback if we - // haven't had a fault by here. + } else if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. (*sq_it).canWB = true; ++storesToWB; -- cgit v1.2.3