diff options
Diffstat (limited to 'cpu/ozone')
33 files changed, 13819 insertions, 0 deletions
diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc new file mode 100644 index 000000000..cb014e4cc --- /dev/null +++ b/cpu/ozone/back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +//template class BackEnd<OzoneImpl>; diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh new file mode 100644 index 000000000..14b011ab8 --- /dev/null +++ b/cpu/ozone/back_end.hh @@ -0,0 +1,516 @@ + +#ifndef __CPU_OZONE_BACK_END_HH__ +#define __CPU_OZONE_BACK_END_HH__ + +#include <list> +#include <queue> +#include <string> + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +class ExecContext; + +template <class Impl> +class OzoneThreadState; + +template <class Impl> +class BackEnd +{ + public: + typedef OzoneThreadState<Impl> Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer<DispatchToIssue> d2i; + typename TimeBuffer<DispatchToIssue>::wire instsToDispatch; + TimeBuffer<IssueToExec> i2e; + typename TimeBuffer<IssueToExec>::wire instsToExecute; + TimeBuffer<ExecToCommit> e2c; + TimeBuffer<Writeback> numInstsToWB; + + TimeBuffer<CommStruct> *comm; + typename TimeBuffer<CommStruct>::wire toIEW; + typename TimeBuffer<CommStruct>::wire fromCommit; + + class InstQueue { + enum queue { + NonSpec, + IQ, + ToBeScheduled, + ReadyList, + ReplayList + }; + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + public: + InstQueue(Params *params); + + std::string name() const; + + void regStats(); + + void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue); + + void setBE(BackEnd *_be) { be = _be; } + + void insert(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &sn); + + DynInstPtr getReadyInst(); + + void commit(const InstSeqNum &sn) {} + + void squash(const InstSeqNum &sn); + + int wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst); + + void violation(DynInstPtr &inst, DynInstPtr &violation) { } + + bool isFull() { return numInsts >= size; } + + void dumpInsts(); + + private: + bool find(queue q, typename std::list<DynInstPtr>::iterator it); + BackEnd *be; + TimeBuffer<IssueToExec> *i2e; + typename TimeBuffer<IssueToExec>::wire numIssued; + typedef typename std::list<DynInstPtr> InstList; + typedef typename std::list<DynInstPtr>::iterator InstListIt; + typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue; + // Not sure I need the IQ list; it just needs to be a count. + InstList iq; + InstList toBeScheduled; + InstList readyList; + InstList nonSpec; + InstList replayList; + ReadyInstQueue readyQueue; + public: + int size; + int numInsts; + int width; + + Stats::VectorDistribution<> occ_dist; + + Stats::Vector<> inst_count; + Stats::Vector<> peak_inst_count; + Stats::Scalar<> empty_count; + Stats::Scalar<> current_count; + Stats::Scalar<> fullCount; + + Stats::Formula occ_rate; + Stats::Formula avg_residency; + Stats::Formula empty_rate; + Stats::Formula full_rate; + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + BackEnd *be; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, BackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + }; + + BackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer<CommStruct> *_comm); + + void tick(); + void squash(); + void squashFromXC(); + bool xcSquash; + + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst) + { IQ.rescheduleMemInst(inst); } + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst) + { IQ.replayMemInst(inst); } + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) + { IQ.completeMemInst(inst); } + + void fetchFault(Fault &fault); + + private: + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void scheduleReadyInsts(); + void executeInsts(); + void commitInsts(); + void addToIQ(DynInstPtr &inst); + void addToLSQ(DynInstPtr &inst); + void instToCommit(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + Status dispatchStatus; + + Counter funcExeInst; + + private: +// typedef typename Impl::InstQueue InstQueue; + + InstQueue IQ; + + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable<Impl> commitRenameTable; + + RenameTable<Impl> renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + BackEnd *be; + + public: + DCacheCompletionEvent(BackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + private: + typedef typename std::list<DynInstPtr>::iterator InstListIt; + + std::list<DynInstPtr> instList; + std::list<DynInstPtr> dispatch; + std::list<DynInstPtr> writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + bool fetchRedirect[Impl::MaxThreads]; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); +}; + +template <class Impl> +template <class T> +Fault +BackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ +/* memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + --funcExeInst; + + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } else { + // do functional access + fault = thread->mem->read(memReq, data); + + } + } +*/ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return LSQ.read(req, data, load_idx); +} + +template <class Impl> +template <class T> +Fault +BackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; + memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; + */ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_BACK_END_HH__ diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh new file mode 100644 index 000000000..36770d65c --- /dev/null +++ b/cpu/ozone/back_end_impl.hh @@ -0,0 +1,1904 @@ + +#include "encumbered/cpu/full/op_class.hh" +#include "cpu/ozone/back_end.hh" + +template <class Impl> +BackEnd<Impl>::InstQueue::InstQueue(Params *params) + : size(params->numIQEntries), numInsts(0), width(params->issueWidth) +{ +} + +template <class Impl> +std::string +BackEnd<Impl>::InstQueue::name() const +{ + return be->name() + ".iq"; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::regStats() +{ + using namespace Stats; + + occ_dist + .init(1, 0, size, 2) + .name(name() + "occ_dist") + .desc("IQ Occupancy per cycle") + .flags(total | cdf) + ; + + inst_count + .init(1) + .name(name() + "cum_num_insts") + .desc("Total occupancy") + .flags(total) + ; + + peak_inst_count + .init(1) + .name(name() + "peak_occupancy") + .desc("Peak IQ occupancy") + .flags(total) + ; + + current_count + .name(name() + "current_count") + .desc("Occupancy this cycle") + ; + + empty_count + .name(name() + "empty_count") + .desc("Number of empty cycles") + ; + + fullCount + .name(name() + "full_count") + .desc("Number of full cycles") + ; + + + occ_rate + .name(name() + "occ_rate") + .desc("Average occupancy") + .flags(total) + ; + occ_rate = inst_count / be->cpu->numCycles; + + avg_residency + .name(name() + "avg_residency") + .desc("Average IQ residency") + .flags(total) + ; + avg_residency = occ_rate / be->cpu->numCycles; + + empty_rate + .name(name() + "empty_rate") + .desc("Fraction of cycles empty") + ; + empty_rate = 100 * empty_count / be->cpu->numCycles; + + full_rate + .name(name() + "full_rate") + .desc("Fraction of cycles full") + ; + full_rate = 100 * fullCount / be->cpu->numCycles; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue) +{ + i2e = i2e_queue; + numIssued = i2e->getWire(0); +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst) +{ + numInsts++; + inst_count[0]++; + if (!inst->isNonSpeculative()) { + DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum); + if (inst->readyToIssue()) { + toBeScheduled.push_front(inst); + inst->iqIt = toBeScheduled.begin(); + inst->iqItValid = true; + } else { + iq.push_front(inst); + inst->iqIt = iq.begin(); + inst->iqItValid = true; + } + } else { + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum); + nonSpec.push_front(inst); + inst->iqIt = nonSpec.begin(); + inst->iqItValid = true; + } +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::scheduleReadyInsts() +{ + int scheduled = numIssued->size; + InstListIt iq_it = --toBeScheduled.end(); + InstListIt iq_end_it = toBeScheduled.end(); + + while (iq_it != iq_end_it && scheduled < width) { +// if ((*iq_it)->readyToIssue()) { + DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n", + (*iq_it)->seqNum, (*iq_it)->readPC()); + readyQueue.push(*iq_it); + readyList.push_front(*iq_it); + + (*iq_it)->iqIt = readyList.begin(); + + toBeScheduled.erase(iq_it--); + + ++scheduled; +// } else { +// iq_it++; +// } + } + + numIssued->size+= scheduled; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn) +{ +/* + InstListIt non_spec_it = nonSpec.begin(); + InstListIt non_spec_end_it = nonSpec.end(); + + while ((*non_spec_it)->seqNum != sn) { + non_spec_it++; + assert(non_spec_it != non_spec_end_it); + } +*/ + DynInstPtr inst = nonSpec.back(); + + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum); + + assert(inst->seqNum == sn); + + assert(find(NonSpec, inst->iqIt)); + nonSpec.erase(inst->iqIt); + readyList.push_front(inst); + inst->iqIt = readyList.begin(); + readyQueue.push(inst); + numIssued->size++; +} + +template <class Impl> +typename Impl::DynInstPtr +BackEnd<Impl>::InstQueue::getReadyInst() +{ + assert(!readyList.empty()); + + DynInstPtr inst = readyQueue.top(); + readyQueue.pop(); + assert(find(ReadyList, inst->iqIt)); + readyList.erase(inst->iqIt); + inst->iqItValid = false; +// if (!inst->isMemRef()) + --numInsts; + return inst; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn) +{ + InstListIt iq_it = iq.begin(); + InstListIt iq_end_it = iq.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + iq.erase(iq_it++); + --numInsts; + } + + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + nonSpec.erase(iq_it++); + --numInsts; + } + + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + + while (iq_it != iq_end_it) { + if ((*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + replayList.erase(iq_it++); + --numInsts; + } else { + iq_it++; + } + } + + assert(numInsts >= 0); +/* + InstListIt ready_it = readyList.begin(); + InstListIt ready_end_it = readyList.end(); + + while (ready_it != ready_end_it) { + if ((*ready_it)->seqNum > sn) { + readyList.erase(ready_it++); + } else { + ready_it++; + } + } +*/ +} + +template <class Impl> +int +BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst) +{ + assert(!inst->isSquashed()); + std::vector<DynInstPtr> &dependents = inst->getDependents(); + int num_outputs = dependents.size(); + + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr dep_inst = dependents[i]; + dep_inst->markSrcRegReady(); + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->iqItValid) { + if (dep_inst->isNonSpeculative()) { + assert(find(NonSpec, dep_inst->iqIt)); + nonSpec.erase(dep_inst->iqIt); + } else { + assert(find(IQ, dep_inst->iqIt)); + iq.erase(dep_inst->iqIt); + } + + toBeScheduled.push_front(dep_inst); + dep_inst->iqIt = toBeScheduled.begin(); + } + } + return num_outputs; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst) +{ + DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum); + assert(!inst->iqItValid); + replayList.push_front(inst); + inst->iqIt = replayList.begin(); + inst->iqItValid = true; + ++numInsts; +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst) +{ + DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum); + assert(find(ReplayList, inst->iqIt)); + InstListIt iq_it = --replayList.end(); + InstListIt iq_end_it = replayList.end(); + while (iq_it != iq_end_it) { + DynInstPtr rescheduled_inst = (*iq_it); + + DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum); + replayList.erase(iq_it--); + toBeScheduled.push_front(rescheduled_inst); + rescheduled_inst->iqIt = toBeScheduled.begin(); + } +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst) +{ + panic("Not implemented."); +} + +template <class Impl> +bool +BackEnd<Impl>::InstQueue::find(queue q, InstListIt it) +{ + InstListIt iq_it, iq_end_it; + switch(q) { + case NonSpec: + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + break; + case IQ: + iq_it = iq.begin(); + iq_end_it = iq.end(); + break; + case ToBeScheduled: + iq_it = toBeScheduled.begin(); + iq_end_it = toBeScheduled.end(); + break; + case ReadyList: + iq_it = readyList.begin(); + iq_end_it = readyList.end(); + break; + case ReplayList: + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + } + + while (iq_it != it && iq_it != iq_end_it) { + iq_it++; + } + if (iq_it == it) { + return true; + } else { + return false; + } +} + +template <class Impl> +void +BackEnd<Impl>::InstQueue::dumpInsts() +{ + cprintf("IQ size: %i\n", iq.size()); + + InstListIt inst_list_it = --iq.end(); + + int num = 0; + int valid_num = 0; + while (inst_list_it != iq.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("nonSpec size: %i\n", nonSpec.size()); + + inst_list_it = --nonSpec.end(); + + while (inst_list_it != nonSpec.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("toBeScheduled size: %i\n", toBeScheduled.size()); + + inst_list_it = --toBeScheduled.end(); + + while (inst_list_it != toBeScheduled.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("readyList size: %i\n", readyList.size()); + + inst_list_it = --readyList.end(); + + while (inst_list_it != readyList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } +} + +template<class Impl> +BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + BackEnd<Impl> *_be) + : Event(&mainEventQueue), inst(_inst), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template<class Impl> +void +BackEnd<Impl>::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template<class Impl> +const char * +BackEnd<Impl>::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template <class Impl> +BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template <class Impl> +void +BackEnd<Impl>::DCacheCompletionEvent::process() +{ +} + +template <class Impl> +const char * +BackEnd<Impl>::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template <class Impl> +BackEnd<Impl>::BackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + xcSquash(false), IQ(params), + cacheCompletionEvent(this), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + IQ.setBE(this); + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + + IQ.setIssueExecQueue(&i2e); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template <class Impl> +std::string +BackEnd<Impl>::name() const +{ + return cpu->name() + ".backend"; +} + +template <class Impl> +void +BackEnd<Impl>::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; i<Num_OpClasses; ++i) { + stringstream subname; + subname << opClassStrings[i] << "_delay"; + issue_delay_dist.subname(i, subname.str()); + } +*/ + // + // Other stats + // + lsq_forw_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; + + IQ.regStats(); +} + +template <class Impl> +void +BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +template <class Impl> +void +BackEnd<Impl>::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + ROB_count[0]+= numInsts; + + wbCycle = 0; + + if (xcSquash) { + squashFromXC(); + } + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + + if (dispatchStatus != Blocked) { + d2i.advance(); + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + i2e.advance(); + scheduleReadyInsts(); + + e2c.advance(); + executeInsts(); + + numInstsToWB.advance(); + writebackInsts(); + + commitInsts(); + + DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n", + IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores()); + + assert(numInsts == instList.size()); +} + +template <class Impl> +void +BackEnd<Impl>::updateStructures() +{ + if (fromCommit->doneSeqNum) { + IQ.commit(fromCommit->doneSeqNum); + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { + LSQ.executeLoad(fromCommit->lqIdx); + } else { + IQ.scheduleNonSpec( + fromCommit->nonSpecSeqNum); + } + } +} + +template <class Impl> +void +BackEnd<Impl>::addToIQ(DynInstPtr &inst) +{ + // Do anything IQ specific here? + IQ.insert(inst); +} + +template <class Impl> +void +BackEnd<Impl>::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template <class Impl> +void +BackEnd<Impl>::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + // Pull instructions out of the front end. + int disp_width = dispatchWidth ? dispatchWidth : width; + + // Could model dispatching time, but in general 1 cycle is probably + // good enough. + + if (dispatchSize < numDispatchEntries) { + for (int i = 0; i < disp_width; i++) { + // Get instructions + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) { + // No more instructions to get + break; + } + + DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + // Add to queue to be dispatched. + dispatch.push_back(inst); + + d2i[0].size++; + ++dispatchSize; + } + } + + assert(dispatch.size() < 64); + + for (int i = 0; i < instsToDispatch->size; ++i) { + assert(!dispatch.empty()); + // Get instruction from front of time buffer + DynInstPtr inst = dispatch.front(); + dispatch.pop_front(); + --dispatchSize; + + if (inst->isSquashed()) + continue; + + ++numInsts; + instList.push_back(inst); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + addToIQ(inst); + + if (inst->isMemRef()) { + addToLSQ(inst); + } + + if (inst->isNonSpeculative()) { + inst->setCanCommit(); + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. + if (exactFullStall) { + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + instsToDispatch->size-= i+1; + dispatchStall(); + return; + } + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. + + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +} + +template <class Impl> +void +BackEnd<Impl>::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template <class Impl> +void +BackEnd<Impl>::checkDispatchStatus() +{ + DPRINTF(BE, "Checking dispatch status\n"); + assert(dispatchStatus == Blocked); + if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template <class Impl> +void +BackEnd<Impl>::scheduleReadyInsts() +{ + // Tell IQ to put any ready instructions into the instruction list. + // Probably want to have a list of DynInstPtrs returned here. Then I + // can choose to either put them into a time buffer to simulate + // IQ scheduling time, or hand them directly off to the next stage. + // Do you ever want to directly hand it off to the next stage? + DPRINTF(BE, "Trying to schedule ready instructions\n"); + IQ.scheduleReadyInsts(); +} + +template <class Impl> +void +BackEnd<Impl>::executeInsts() +{ + int insts_to_execute = instsToExecute->size; + + issued_ops[0]+= insts_to_execute; + n_issued_dist[insts_to_execute]++; + + DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute); + + fetchRedirect[0] = false; + + while (insts_to_execute > 0) { + // Get ready instruction from the IQ (or queue coming out of IQ) + // Execute the ready instruction. + // Wakeup any dependents if it's done. + DynInstPtr inst = IQ.getReadyInst(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + ++funcExeInst; + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + --insts_to_execute; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + // Loads will mark themselves as executed, and their writeback + // event adds the instruction to the queue to commit + fault = LSQ.executeLoad(inst); + +// ++iewExecLoadInsts; + } else if (inst->isStore()) { + LSQ.executeStore(inst); + +// ++iewExecStoreInsts; + + if (!(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + // Store conditionals will mark themselves as executed, and + // their writeback event will add the instruction to the queue + // to commit. + } else { + panic("Unexpected memory type!\n"); + } + + } else { + inst->execute(); + +// ++iewExecutedInsts; + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + // Probably should have some sort of function for this. + // More general question of how to handle squashes? Have some sort of + // squash unit that controls it? Probably... + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + + // This probably needs to prioritize the redirects if a different + // scheduler is used. Currently the scheduler schedules the oldest + // instruction first, so the branch resolution order will be correct. + unsigned tid = inst->threadNumber; + + if (!fetchRedirect[tid]) { + + if (inst->mispredicted()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Execute: Branch mispredict detected.\n"); + DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { +// predictedTakenIncorrect++; + } else { +// predictedNotTakenIncorrect++; + } + } else if (LSQ.violation()) { + fetchRedirect[tid] = true; + + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. +// IQ.violation(inst, violator); + + // Squash. +// squashDueToMemOrder(inst,tid); + squashDueToBranch(inst); + +// ++memOrderViolationEvents; + } else if (LSQ.loadBlocked()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + + squashDueToMemBlocked(inst); + } + } + +// instList.pop_front(); + + --insts_to_execute; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; + } + + assert(insts_to_execute >= 0); +} + +template<class Impl> +void +BackEnd<Impl>::instToCommit(DynInstPtr &inst) +{ + int wb_width = wbWidth; + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + while (numInstsToWB[wbCycle].size >= wb_width) { + ++wbCycle; + + assert(wbCycle < 5); + } + + // Add finished instruction to queue to commit. + writeback.push_back(inst); + numInstsToWB[wbCycle].size++; + + if (wbCycle) + wb_penalized[0]++; +} + +template <class Impl> +void +BackEnd<Impl>::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setResultReady(); + + if (inst->isExecuted()) { + int dependents = IQ.wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} + +template <class Impl> +bool +BackEnd<Impl>::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.front(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. +// thread->funcExeInst--; + + if (inst->isNonSpeculative()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } +#endif + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + // Send back the non-speculative instruction's sequence number. + toIEW->nonSpecSeqNum = inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + toIEW->nonSpecSeqNum = inst->seqNum; + toIEW->uncached = true; + toIEW->lqIdx = inst->lqIdx; + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (inst->isThreadSync()) + { + // Not handled for now. + panic("Barrier instructions are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + if (inst_fault != NoFault) { + if (!inst->isNop()) { +#if FULL_SYSTEM + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + +// assert(!thread->inSyscall); + +// thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. +// thread->inSyscall = false; + +// commitStatus = TrapPending; + + // Generate trap squash event. +// generateTrapEvent(); + + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + } + + if (inst->isControl()) { +// ++commitCommittedBranches; + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_front(); + + --numInsts; + cpu->numInst++; + thread->numInsts++; + ++thread->funcExeInst; + thread->PC = inst->readNextPC(); + updateComInstStats(inst); + + // Write the done sequence number here. + toIEW->doneSeqNum = inst->seqNum; + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); +// completed_last_inst = false; +// squashPending = true; + return false; + } +#endif + return true; +} + +template <class Impl> +void +BackEnd<Impl>::commitInsts() +{ + int commit_width = commitWidth ? commitWidth : width; + + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commit_width) { + if (instList.front()->isSquashed()) { + panic("No squashed insts should still be on the list!"); + instList.front()->clearDependents(); + instList.pop_front(); + continue; + } + + if (!commitInst(inst_num++)) { + break; + } + } + n_committed_dist.sample(inst_num); +} + +template <class Impl> +void +BackEnd<Impl>::squash(const InstSeqNum &sn) +{ + IQ.squash(sn); + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt dispatch_end = dispatch.end(); + InstListIt insts_it = dispatch.end(); + insts_it--; + + while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + // Be careful with IPRs and such here + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + --insts_it; + } + + insts_it = instList.end(); + insts_it--; + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + instList.erase(insts_it--); + --numInsts; + } + + frontEnd->addFreeRegs(freed_regs); +} + +template <class Impl> +void +BackEnd<Impl>::squashFromXC() +{ + xcSquash = true; +} + +template <class Impl> +void +BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template <class Impl> +void +BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template <class Impl> +void +BackEnd<Impl>::fetchFault(Fault &fault) +{ + faultFromFetch = fault; +} + +template <class Impl> +void +BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template <class Impl> +void +BackEnd<Impl>::updateComInstStats(DynInstPtr &inst) +{ + unsigned thread = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[thread]++; + } else { + stat_com_inst[thread]++; + } +#else + stat_com_inst[thread]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[thread]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[thread]++; + + if (inst->isLoad()) { + stat_com_loads[thread]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[thread]++; + } +} + +template <class Impl> +void +BackEnd<Impl>::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Dispatch list size: %i\n", dispatch.size()); + + inst_list_it = dispatch.begin(); + + while (inst_list_it != dispatch.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Writeback list size: %i\n", writeback.size()); + + inst_list_it = writeback.begin(); + + while (inst_list_it != writeback.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc new file mode 100644 index 000000000..64aa49c71 --- /dev/null +++ b/cpu/ozone/cpu_builder.cc @@ -0,0 +1,830 @@ + +#include <string> + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class DerivOzoneCPU : public OzoneCPU<OzoneImpl> +{ + public: + DerivOzoneCPU(SimpleParams *p) + : OzoneCPU<OzoneImpl>(p) + { } +}; + +class SimpleOzoneCPU : public OzoneCPU<SimpleImpl> +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU<SimpleImpl>(p) + { } +}; + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +//SimObjectParam<PageTable *> page_table; +#endif // FULL_SYSTEM + +SimObjectParam<FunctionalMemory *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> cachePorts; +Param<unsigned> width; +Param<unsigned> frontEndWidth; +Param<unsigned> backEndWidth; +Param<unsigned> backEndSquashLatency; +Param<unsigned> backEndLatency; +Param<unsigned> maxInstBufferSize; +Param<unsigned> numPhysicalRegs; +Param<unsigned> maxOutstandingMemOps; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<bool> decoupledFrontEnd; +Param<int> dispatchWidth; +Param<int> wbWidth; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +CREATE_SIM_OBJECT(DerivOzoneCPU) +{ + DerivOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + params->maxOutstandingMemOps = maxOutstandingMemOps; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) + + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param<int> clock; + Param<int> numThreads; + +#if FULL_SYSTEM +SimObjectParam<System *> system; +Param<int> cpu_id; +SimObjectParam<AlphaITB *> itb; +SimObjectParam<AlphaDTB *> dtb; +#else +SimObjectVectorParam<Process *> workload; +//SimObjectParam<PageTable *> page_table; +#endif // FULL_SYSTEM + +SimObjectParam<FunctionalMemory *> mem; + +SimObjectParam<BaseCPU *> checker; + +Param<Counter> max_insts_any_thread; +Param<Counter> max_insts_all_threads; +Param<Counter> max_loads_any_thread; +Param<Counter> max_loads_all_threads; + +SimObjectParam<BaseCache *> icache; +SimObjectParam<BaseCache *> dcache; + +Param<unsigned> cachePorts; +Param<unsigned> width; +Param<unsigned> frontEndWidth; +Param<unsigned> backEndWidth; +Param<unsigned> backEndSquashLatency; +Param<unsigned> backEndLatency; +Param<unsigned> maxInstBufferSize; +Param<unsigned> numPhysicalRegs; + +Param<unsigned> decodeToFetchDelay; +Param<unsigned> renameToFetchDelay; +Param<unsigned> iewToFetchDelay; +Param<unsigned> commitToFetchDelay; +Param<unsigned> fetchWidth; + +Param<unsigned> renameToDecodeDelay; +Param<unsigned> iewToDecodeDelay; +Param<unsigned> commitToDecodeDelay; +Param<unsigned> fetchToDecodeDelay; +Param<unsigned> decodeWidth; + +Param<unsigned> iewToRenameDelay; +Param<unsigned> commitToRenameDelay; +Param<unsigned> decodeToRenameDelay; +Param<unsigned> renameWidth; + +Param<unsigned> commitToIEWDelay; +Param<unsigned> renameToIEWDelay; +Param<unsigned> issueToExecuteDelay; +Param<unsigned> issueWidth; +Param<unsigned> executeWidth; +Param<unsigned> executeIntWidth; +Param<unsigned> executeFloatWidth; +Param<unsigned> executeBranchWidth; +Param<unsigned> executeMemoryWidth; + +Param<unsigned> iewToCommitDelay; +Param<unsigned> renameToROBDelay; +Param<unsigned> commitWidth; +Param<unsigned> squashWidth; + +Param<unsigned> localPredictorSize; +Param<unsigned> localCtrBits; +Param<unsigned> localHistoryTableSize; +Param<unsigned> localHistoryBits; +Param<unsigned> globalPredictorSize; +Param<unsigned> globalCtrBits; +Param<unsigned> globalHistoryBits; +Param<unsigned> choicePredictorSize; +Param<unsigned> choiceCtrBits; + +Param<unsigned> BTBEntries; +Param<unsigned> BTBTagSize; + +Param<unsigned> RASSize; + +Param<unsigned> LQEntries; +Param<unsigned> SQEntries; +Param<unsigned> LFSTSize; +Param<unsigned> SSITSize; + +Param<unsigned> numPhysIntRegs; +Param<unsigned> numPhysFloatRegs; +Param<unsigned> numIQEntries; +Param<unsigned> numROBEntries; + +Param<bool> decoupledFrontEnd; +Param<int> dispatchWidth; +Param<int> wbWidth; + +Param<unsigned> smtNumFetchingThreads; +Param<std::string> smtFetchPolicy; +Param<std::string> smtLSQPolicy; +Param<unsigned> smtLSQThreshold; +Param<std::string> smtIQPolicy; +Param<unsigned> smtIQThreshold; +Param<std::string> smtROBPolicy; +Param<unsigned> smtROBThreshold; +Param<std::string> smtCommitPolicy; + +Param<unsigned> instShiftAmt; + +Param<bool> defer_registration; + +Param<bool> function_trace; +Param<Tick> function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/cpu/ozone/dyn_inst.cc b/cpu/ozone/dyn_inst.cc new file mode 100644 index 000000000..3bf8b03ca --- /dev/null +++ b/cpu/ozone/dyn_inst.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class OzoneDynInst<OzoneImpl>; +template class OzoneDynInst<SimpleImpl>; + diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh new file mode 100644 index 000000000..5d48bb361 --- /dev/null +++ b/cpu/ozone/dyn_inst.hh @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_DYN_INST_HH__ +#define __CPU_OZONE_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/ozone/cpu.hh" // MUST include this +#include "cpu/inst_seq.hh" +#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/ozone_impl.hh" + +#include <list> +#include <vector> + +template <class Impl> +class OzoneDynInst : public BaseDynInst<Impl> +{ + public: + // Typedefs + typedef typename Impl::FullCPU FullCPU; + + typedef typename FullCPU::ImplState ImplState; + + // Typedef for DynInstPtr. This is really just a RefCountingPtr<OoODynInst>. + typedef typename Impl::DynInstPtr DynInstPtr; + + typedef TheISA::ExtMachInst ExtMachInst; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscReg MiscReg; + typedef typename std::list<DynInstPtr>::iterator ListIt; + + // Note that this is duplicated from the BaseDynInst class; I'm + // simply not sure the enum would carry through so I could use it + // in array declarations in this class. + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, + MaxInstDestRegs = TheISA::MaxInstDestRegs + }; + + OzoneDynInst(FullCPU *cpu); + + OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu); + + OzoneDynInst(StaticInstPtr inst); + + ~OzoneDynInst(); + + void setSrcInst(DynInstPtr &newSrcInst, int regIdx) + { srcInsts[regIdx] = newSrcInst; } + + bool srcInstReady(int regIdx); + + void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx) + { prevDestInst[regIdx] = oldDestInst; } + + DynInstPtr &getPrevDestInst(int regIdx) + { return prevDestInst[regIdx]; } + + void addDependent(DynInstPtr &dependent_inst); + + std::vector<DynInstPtr> &getDependents() { return dependents; } + std::vector<DynInstPtr> &getMemDeps() { return memDependents; } + std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; } + + void wakeDependents(); + + void wakeMemDependents(); + + void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } + + void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } + + void markMemInstReady(OzoneDynInst<Impl> *inst); + + // For now I will remove instructions from the list when they wake + // up. In the future, you only really need a counter. + bool memDepReady() { return srcMemInsts.empty(); } + + private: + void initInstPtrs(); + + std::vector<DynInstPtr> dependents; + + std::vector<DynInstPtr> memDependents; + + std::list<DynInstPtr> srcMemInsts; + + /** The instruction that produces the value of the source + * registers. These may be NULL if the value has already been + * read from the source instruction. + */ + DynInstPtr srcInsts[MaxInstSrcRegs]; + + /** + * Previous rename instruction for this destination. + */ + DynInstPtr prevDestInst[MaxInstSrcRegs]; + + public: + + Fault initiateAcc(); + + Fault completeAcc(); + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + float readFloatRegSingle(const StaticInst *si, int idx) + { + return srcInsts[idx]->readFloatResult(); + } + + double readFloatRegDouble(const StaticInst *si, int idx) + { + return srcInsts[idx]->readDoubleResult(); + } + + uint64_t readFloatRegInt(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + BaseDynInst<Impl>::setIntReg(si, idx, val); + } + + void setFloatRegSingle(const StaticInst *si, int idx, float val) + { + BaseDynInst<Impl>::setFloatRegSingle(si, idx, val); + } + + void setFloatRegDouble(const StaticInst *si, int idx, double val) + { + BaseDynInst<Impl>::setFloatRegDouble(si, idx, val); + } + + void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + { + BaseDynInst<Impl>::setFloatRegInt(si, idx, val); + } + + void setIntResult(uint64_t result) { this->instResult.integer = result; } + void setDoubleResult(double result) { this->instResult.dbl = result; } + + bool srcsReady(); + bool eaSrcsReady(); + + Fault execute(); + + Fault executeEAComp() + { return NoFault; } + + Fault executeMemAcc() + { return this->staticInst->memAccInst()->execute(this, this->traceData); } + + void clearDependents(); + + void clearMemDependents(); + + public: + // ISA stuff + MiscReg readMiscReg(int misc_reg); + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + Fault setMiscReg(int misc_reg, const MiscReg &val); + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + +#if FULL_SYSTEM + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + + ListIt iqIt; + bool iqItValid; +}; + +#endif // __CPU_OZONE_DYN_INST_HH__ diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh new file mode 100644 index 000000000..f891ec515 --- /dev/null +++ b/cpu/ozone/dyn_inst_impl.hh @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "kern/kernel_stats.hh" + +using namespace TheISA; + +template <class Impl> +OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu) + : BaseDynInst<Impl>(0, 0, 0, 0, cpu) +{ + this->setResultReady(); + + initInstPtrs(); +} + +template <class Impl> +OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu) +{ + initInstPtrs(); +} + +template <class Impl> +OzoneDynInst<Impl>::OzoneDynInst(StaticInstPtr _staticInst) + : BaseDynInst<Impl>(_staticInst) +{ + initInstPtrs(); +} + +template <class Impl> +OzoneDynInst<Impl>::~OzoneDynInst() +{ + DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } + + dependents.clear(); +} + +template <class Impl> +Fault +OzoneDynInst<Impl>::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +OzoneDynInst<Impl>::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template <class Impl> +Fault +OzoneDynInst<Impl>::completeAcc() +{ + if (this->isLoad()) { + this->fault = this->staticInst->completeAcc(this->req->data, + this, + this->traceData); + } else if (this->isStore()) { + this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, + this, + this->traceData); + } else { + panic("Unknown type!"); + } + + return this->fault; +} + +template <class Impl> +bool +OzoneDynInst<Impl>::srcInstReady(int regIdx) +{ + return srcInsts[regIdx]->isResultReady(); +} + +template <class Impl> +void +OzoneDynInst<Impl>::addDependent(DynInstPtr &dependent_inst) +{ + dependents.push_back(dependent_inst); +} + +template <class Impl> +void +OzoneDynInst<Impl>::wakeDependents() +{ + for (int i = 0; i < dependents.size(); ++i) { + dependents[i]->markSrcRegReady(); + } +} + +template <class Impl> +void +OzoneDynInst<Impl>::wakeMemDependents() +{ + for (int i = 0; i < memDependents.size(); ++i) { + memDependents[i]->markMemInstReady(this); + } +} + +template <class Impl> +void +OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst) +{ + ListIt mem_it = srcMemInsts.begin(); + while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { + mem_it++; + } + assert(mem_it != srcMemInsts.end()); + + srcMemInsts.erase(mem_it); +} + +template <class Impl> +void +OzoneDynInst<Impl>::initInstPtrs() +{ + for (int i = 0; i < MaxInstSrcRegs; ++i) { + srcInsts[i] = NULL; + } + iqItValid = false; +} + +template <class Impl> +bool +OzoneDynInst<Impl>::srcsReady() +{ + for (int i = 0; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isResultReady()) + return false; + } + + return true; +} + +template <class Impl> +bool +OzoneDynInst<Impl>::eaSrcsReady() +{ + for (int i = 1; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isResultReady()) + return false; + } + + return true; +} + +template <class Impl> +void +OzoneDynInst<Impl>::clearDependents() +{ + dependents.clear(); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } +} + +template <class Impl> +void +OzoneDynInst<Impl>::clearMemDependents() +{ + memDependents.clear(); +} + +template <class Impl> +MiscReg +OzoneDynInst<Impl>::readMiscReg(int misc_reg) +{ + return this->thread->readMiscReg(misc_reg); +} + +template <class Impl> +MiscReg +OzoneDynInst<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault) +{ + return this->thread->readMiscRegWithEffect(misc_reg, fault); +} + +template <class Impl> +Fault +OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val) +{ + this->setIntResult(val); + return this->thread->setMiscReg(misc_reg, val); +} + +template <class Impl> +Fault +OzoneDynInst<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +{ + return this->thread->setMiscRegWithEffect(misc_reg, val); +} + +#if FULL_SYSTEM + +template <class Impl> +Fault +OzoneDynInst<Impl>::hwrei() +{ + if (!this->cpu->inPalMode(this->readPC())) + return new AlphaISA::UnimplementedOpcodeFault; + + this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + + this->cpu->hwrei(); + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template <class Impl> +int +OzoneDynInst<Impl>::readIntrFlag() +{ +return this->cpu->readIntrFlag(); +} + +template <class Impl> +void +OzoneDynInst<Impl>::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template <class Impl> +bool +OzoneDynInst<Impl>::inPalMode() +{ + return this->cpu->inPalMode(); +} + +template <class Impl> +void +OzoneDynInst<Impl>::trap(Fault fault) +{ + fault->invoke(this->thread->getXCProxy()); +} + +template <class Impl> +bool +OzoneDynInst<Impl>::simPalCheck(int palFunc) +{ + return this->cpu->simPalCheck(palFunc); +} +#else +template <class Impl> +void +OzoneDynInst<Impl>::syscall() +{ + this->cpu->syscall(); +} +#endif diff --git a/cpu/ozone/front_end.cc b/cpu/ozone/front_end.cc new file mode 100644 index 000000000..a974d43cb --- /dev/null +++ b/cpu/ozone/front_end.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/front_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class FrontEnd<OzoneImpl>; +template class FrontEnd<SimpleImpl>; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh new file mode 100644 index 000000000..dd382491f --- /dev/null +++ b/cpu/ozone/front_end.hh @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_FRONT_END_HH__ +#define __CPU_OZONE_FRONT_END_HH__ + +#include <deque> + +#include "cpu/inst_seq.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/rename_table.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" +#include "sim/stats.hh" + +class ExecContext; +class MemInterface; +template <class> +class OzoneThreadState; +class PageTable; +template <class> +class TimeBuffer; + +template <class Impl> +class FrontEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + + typedef typename Impl::FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + FrontEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setBackEnd(BackEnd *back_end_ptr) + { backEnd = back_end_ptr; } + + void setCommBuffer(TimeBuffer<CommStruct> *_comm); + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState<Impl> *thread_ptr) + { thread = thread_ptr; } + + void regStats(); + + void tick(); + Fault fetchCacheLine(); + void processInst(DynInstPtr &inst); + void squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch = false, const bool branch_taken = false); + DynInstPtr getInst(); + + void processCacheCompletion(MemReqPtr &req); + + void addFreeRegs(int num_freed); + + bool isEmpty() { return instBuffer.empty(); } + + void switchOut(); + + void doSwitchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + + private: + bool updateStatus(); + + void checkBE(); + DynInstPtr getInstFromCacheline(); + void renameInst(DynInstPtr &inst); + // Returns true if we need to stop the front end this cycle + bool processBarriers(DynInstPtr &inst); + + void handleFault(Fault &fault); + public: + Fault getFault() { return fetchFault; } + private: + Fault fetchFault; + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + InstSeqNum getAndIncrementInstSeq() + { return cpu->globalSeqNum++; } + + public: + FullCPU *cpu; + + BackEnd *backEnd; + + ExecContext *xc; + + OzoneThreadState<Impl> *thread; + + enum Status { + Running, + Idle, + IcacheMissStall, + IcacheMissComplete, + SerializeBlocked, + SerializeComplete, + RenameBlocked, + QuiescePending, + TrapPending, + BEBlocked + }; + + Status status; + + private: + TimeBuffer<CommStruct> *comm; + typename TimeBuffer<CommStruct>::wire fromCommit; + + typedef typename Impl::BranchPred BranchPred; + + BranchPred branchPred; + + class ICacheCompletionEvent : public Event + { + private: + MemReqPtr req; + FrontEnd *frontEnd; + + public: + ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *_fe); + + virtual void process(); + virtual const char *description(); + }; + + MemInterface *icacheInterface; + +#if !FULL_SYSTEM + PageTable *pTable; +#endif + + MemReqPtr memReq; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + bool fetchCacheLineNextCycle; + + bool cacheBlkValid; + + public: + RenameTable<Impl> renameTable; + + private: + Addr PC; + Addr nextPC; + + public: + void setPC(Addr val) { PC = val; } + void setNextPC(Addr val) { nextPC = val; } + + void wakeFromQuiesce(); + + void dumpInsts(); + + private: + typedef typename std::deque<DynInstPtr> InstBuff; + typedef typename InstBuff::iterator InstBuffIt; + + InstBuff instBuffer; + + int instBufferSize; + + int maxInstBufferSize; + + int width; + + int freeRegs; + + int numPhysRegs; + + bool serializeNext; + + DynInstPtr barrierInst; + + public: + bool interruptPending; + private: + // number of idle cycles +/* + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; +*/ + // @todo: Consider making these vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar<> icacheStallCycles; + /** Stat for total number of fetched instructions. */ + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> fetchedBranches; + /** Stat for total number of predicted branches. */ + Stats::Scalar<> predictedBranches; + /** Stat for total number of cycles spent fetching. */ + Stats::Scalar<> fetchCycles; + + Stats::Scalar<> fetchIdleCycles; + /** Stat for total number of cycles spent squashing. */ + Stats::Scalar<> fetchSquashCycles; + /** Stat for total number of cycles spent blocked due to other stages in + * the pipeline. + */ + Stats::Scalar<> fetchBlockedCycles; + /** Stat for total number of fetched cache lines. */ + Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; + /** Distribution of number of instructions fetched each cycle. */ + Stats::Distribution<> fetchNisnDist; +// Stats::Vector<> qfull_iq_occupancy; +// Stats::VectorDistribution<> qfull_iq_occ_dist_; + Stats::Formula idleRate; + Stats::Formula branchRate; + Stats::Formula fetchRate; + Stats::Scalar<> IFQCount; // cumulative IFQ occupancy + Stats::Formula IFQOccupancy; + Stats::Formula IFQLatency; + Stats::Scalar<> IFQFcount; // cumulative IFQ full count + Stats::Formula IFQFullRate; + + Stats::Scalar<> dispatchCountStat; + Stats::Scalar<> dispatchedSerializing; + Stats::Scalar<> dispatchedTempSerializing; + Stats::Scalar<> dispatchSerializeStallCycles; + Stats::Formula dispatchRate; + Stats::Formula regIntFull; + Stats::Formula regFpFull; +}; + +#endif // __CPU_OZONE_FRONT_END_HH__ diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh new file mode 100644 index 000000000..ffbcf3340 --- /dev/null +++ b/cpu/ozone/front_end_impl.hh @@ -0,0 +1,920 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/ozone/front_end.hh" +#include "mem/mem_interface.hh" +#include "sim/byte_swap.hh" + +using namespace TheISA; + +template <class Impl> +FrontEnd<Impl>::FrontEnd(Params *params) + : branchPred(params), + icacheInterface(params->icacheInterface), + instBufferSize(0), + maxInstBufferSize(params->maxInstBufferSize), + width(params->frontEndWidth), + freeRegs(params->numPhysicalRegs), + numPhysRegs(params->numPhysicalRegs), + serializeNext(false), + interruptPending(false) +{ + switchedOut = false; + + status = Idle; + + memReq = NULL; + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + assert(isPowerOf2(cacheBlkSize)); + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; + + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; +} + +template <class Impl> +std::string +FrontEnd<Impl>::name() const +{ + return cpu->name() + ".frontend"; +} + +template <class Impl> +void +FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm) +{ + comm = _comm; + // @todo: Hardcoded for now. Allow this to be set by a latency. + fromCommit = comm->getWire(-1); +} + +template <class Impl> +void +FrontEnd<Impl>::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; +} + +template <class Impl> +void +FrontEnd<Impl>::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + + fetchedBranches + .name(name() + ".fetchedBranches") + .desc("Number of fetched branches") + .prereq(fetchedBranches); + + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + + fetchIdleCycles + .name(name() + ".fetchIdleCycles") + .desc("Number of cycles fetch was idle") + .prereq(fetchIdleCycles); + + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + + fetchNisnDist + .init(/* base value */ 0, + /* last value */ width, + /* bucket size */ 1) + .name(name() + ".rateDist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf); + + idleRate + .name(name() + ".idleRate") + .desc("Percent of cycles fetch was idle") + .prereq(idleRate); + idleRate = fetchIdleCycles * 100 / cpu->numCycles; + + branchRate + .name(name() + ".branchRate") + .desc("Number of branch fetches per cycle") + .flags(Stats::total); + branchRate = fetchedBranches / cpu->numCycles; + + fetchRate + .name(name() + ".rate") + .desc("Number of inst fetches per cycle") + .flags(Stats::total); + fetchRate = fetchedInsts / cpu->numCycles; + + IFQCount + .name(name() + ".IFQ:count") + .desc("cumulative IFQ occupancy") + ; + + IFQFcount + .name(name() + ".IFQ:fullCount") + .desc("cumulative IFQ full count") + .flags(Stats::total) + ; + + IFQOccupancy + .name(name() + ".IFQ:occupancy") + .desc("avg IFQ occupancy (inst's)") + ; + IFQOccupancy = IFQCount / cpu->numCycles; + + IFQLatency + .name(name() + ".IFQ:latency") + .desc("avg IFQ occupant latency (cycle's)") + .flags(Stats::total) + ; + + IFQFullRate + .name(name() + ".IFQ:fullRate") + .desc("fraction of time (cycles) IFQ was full") + .flags(Stats::total); + ; + IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; + + dispatchCountStat + .name(name() + ".DIS:count") + .desc("cumulative count of dispatched insts") + .flags(Stats::total) + ; + + dispatchedSerializing + .name(name() + ".DIS:serializingInsts") + .desc("count of serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchedTempSerializing + .name(name() + ".DIS:tempSerializingInsts") + .desc("count of temporary serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchSerializeStallCycles + .name(name() + ".DIS:serializeStallCycles") + .desc("count of cycles dispatch stalled for serializing inst") + .flags(Stats::total) + ; + + dispatchRate + .name(name() + ".DIS:rate") + .desc("dispatched insts per cycle") + .flags(Stats::total) + ; + dispatchRate = dispatchCountStat / cpu->numCycles; + + regIntFull + .name(name() + ".REG:int:full") + .desc("number of cycles where there were no INT registers") + ; + + regFpFull + .name(name() + ".REG:fp:full") + .desc("number of cycles where there were no FP registers") + ; + IFQLatency = IFQOccupancy / dispatchRate; + + branchPred.regStats(); +} + +template <class Impl> +void +FrontEnd<Impl>::tick() +{ + if (switchedOut) + return; + + // @todo: Maybe I want to just have direct communication... + if (fromCommit->doneSeqNum) { + branchPred.update(fromCommit->doneSeqNum, 0); + } + + IFQCount += instBufferSize; + IFQFcount += instBufferSize == maxInstBufferSize; + + // Fetch cache line + if (status == IcacheMissComplete) { + cacheBlkValid = true; + + status = Running; + if (barrierInst) + status = SerializeBlocked; + if (freeRegs <= 0) + status = RenameBlocked; + checkBE(); + } else if (status == IcacheMissStall) { + DPRINTF(FE, "Still in Icache miss stall.\n"); + icacheStallCycles++; + return; + } + + if (status == RenameBlocked || status == SerializeBlocked || + status == TrapPending || status == BEBlocked) { + // Will cause a one cycle bubble between changing state and + // restarting. + DPRINTF(FE, "In blocked status.\n"); + + fetchBlockedCycles++; + + if (status == SerializeBlocked) { + dispatchSerializeStallCycles++; + } + updateStatus(); + return; + } else if (status == QuiescePending) { + DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); + return; + } else if (status != IcacheMissComplete) { + if (fetchCacheLineNextCycle) { + Fault fault = fetchCacheLine(); + if (fault != NoFault) { + handleFault(fault); + fetchFault = fault; + return; + } + fetchCacheLineNextCycle = false; + } + // If miss, stall until it returns. + if (status == IcacheMissStall) { + // Tell CPU to not tick me for now. + return; + } + } + + fetchCycles++; + + int num_inst = 0; + + // Otherwise loop and process instructions. + // One way to hack infinite width is to set width and maxInstBufferSize + // both really high. Inelegant, but probably will work. + while (num_inst < width && + instBufferSize < maxInstBufferSize) { + // Get instruction from cache line. + DynInstPtr inst = getInstFromCacheline(); + + if (!inst) { + // PC is no longer in the cache line, end fetch. + // Might want to check this at the end of the cycle so that + // there's no cycle lost to checking for a new cache line. + DPRINTF(FE, "Need to get new cache line\n"); + fetchCacheLineNextCycle = true; + break; + } + + processInst(inst); + + if (status == SerializeBlocked) { + break; + } + + // Possibly push into a time buffer that estimates the front end + // latency + instBuffer.push_back(inst); + ++instBufferSize; + ++num_inst; + +#if FULL_SYSTEM + if (inst->isQuiesce()) { + warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); + status = QuiescePending; + break; + } +#endif + + if (inst->predTaken()) { + // Start over with tick? + break; + } else if (freeRegs <= 0) { + DPRINTF(FE, "Ran out of free registers to rename to!\n"); + status = RenameBlocked; + break; + } else if (serializeNext) { + break; + } + } + + fetchNisnDist.sample(num_inst); + checkBE(); + + DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " + "Regs %i\n", num_inst, instBufferSize, freeRegs); +} + +template <class Impl> +Fault +FrontEnd<Impl>::fetchCacheLine() +{ + // Read a cache line, based on the current PC. +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + Fault fault = NoFault; + + if (interruptPending && flags == 0) { + return fault; + } + + // Align the fetch PC so it's at the start of a cache block. + Addr fetch_PC = icacheBlockAlignPC(PC); + + DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq = new MemReq(); + + memReq->asid = 0; + memReq->thread_num = 0; + memReq->data = new uint8_t[64]; + memReq->xc = xc; + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { +#if FULL_SYSTEM + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { + DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " + "misspeculating path!", + memReq->paddr); + return TheISA::genMachineCheckFault(); + } +#endif + + memReq->completionEvent = NULL; + + memReq->time = curTick; + fault = cpu->mem->read(memReq, cacheData); + + MemAccessResult res = icacheInterface->access(memReq); + + // If the cache missed then schedule an event to wake + // up this stage once the cache miss completes. + if (icacheInterface->doEvents() && res != MA_HIT) { + memReq->completionEvent = new ICacheCompletionEvent(memReq, this); + + status = IcacheMissStall; + + cacheBlkValid = false; + + DPRINTF(FE, "Cache miss.\n"); + } else { + DPRINTF(FE, "Cache hit.\n"); + + cacheBlkValid = true; + +// memcpy(cacheData, memReq->data, memReq->size); + } + } + + // Note that this will set the cache block PC a bit earlier than it should + // be set. + cacheBlkPC = fetch_PC; + + ++fetchedCacheLines; + + DPRINTF(FE, "Done fetching cache line.\n"); + + return fault; +} + +template <class Impl> +void +FrontEnd<Impl>::processInst(DynInstPtr &inst) +{ + if (processBarriers(inst)) { + return; + } + + Addr inst_PC = inst->readPC(); + + if (!inst->isControl()) { + inst->setPredTarg(inst->readNextPC()); + } else { + fetchedBranches++; + if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { + predictedBranches++; + } + } + + Addr next_PC = inst->readPredTarg(); + + DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " + "%#x\n", inst->seqNum, inst_PC, next_PC); + +// inst->setNextPC(next_PC); + + // Not sure where I should set this + PC = next_PC; + + renameInst(inst); +} + +template <class Impl> +bool +FrontEnd<Impl>::processBarriers(DynInstPtr &inst) +{ + if (serializeNext) { + inst->setSerializeBefore(); + serializeNext = false; + } else if (!inst->isSerializing() && + !inst->isIprAccess() && + !inst->isStoreConditional()) { + return false; + } + + if ((inst->isIprAccess() || inst->isSerializeBefore()) && + !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize before instruction encountered.\n"); + + if (!inst->isTempSerializeBefore()) { + dispatchedSerializing++; + inst->setSerializeHandled(); + } else { + dispatchedTempSerializing++; + } + + // Change status over to SerializeBlocked so that other stages know + // what this is blocked on. + status = SerializeBlocked; + + barrierInst = inst; + return true; + } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) + && !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize after instruction encountered.\n"); + + inst->setSerializeHandled(); + + dispatchedSerializing++; + + serializeNext = true; + return false; + } + return false; +} + +template <class Impl> +void +FrontEnd<Impl>::handleFault(Fault &fault) +{ + DPRINTF(FE, "Fault at fetch, telling commit\n"); + + // We're blocked on the back end until it handles this fault. + status = TrapPending; + + // Get a sequence number. + InstSeqNum inst_seq = getAndIncrementInstSeq(); + // We will use a nop in order to carry the fault. + ExtMachInst ext_inst = TheISA::NoopMachInst; + + // Create a new DynInst from the dummy nop. + DynInstPtr instruction = new DynInst(ext_inst, PC, + PC+sizeof(MachInst), + inst_seq, cpu); + instruction->setPredTarg(instruction->readNextPC()); +// instruction->setThread(tid); + +// instruction->setASID(tid); + + instruction->setState(thread); + + instruction->traceData = NULL; + + instruction->fault = fault; + instruction->setCanIssue(); + instBuffer.push_back(instruction); + ++instBufferSize; +} + +template <class Impl> +void +FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch, const bool branch_taken) +{ + DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + if (fetchFault != NoFault) + fetchFault = NoFault; + + while (!instBuffer.empty() && + instBuffer.back()->seqNum > squash_num) { + DynInstPtr inst = instBuffer.back(); + + DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", + inst->seqNum, inst->readPC()); + + inst->clearDependents(); + + instBuffer.pop_back(); + --instBufferSize; + + freeRegs+= inst->numDestRegs(); + } + + // Copy over rename table from the back end. + renameTable.copyFrom(backEnd->renameTable); + + PC = next_PC; + + // Update BP with proper information. + if (is_branch) { + branchPred.squash(squash_num, next_PC, branch_taken, 0); + } else { + branchPred.squash(squash_num, 0); + } + + // Clear the icache miss if it's outstanding. + if (status == IcacheMissStall && icacheInterface) { + DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + memReq = NULL; + } + + if (status == SerializeBlocked) { + assert(barrierInst->seqNum > squash_num); + barrierInst = NULL; + } + + // Unless this squash originated from the front end, we're probably + // in running mode now. + // Actually might want to make this latency dependent. + status = Running; + fetchCacheLineNextCycle = true; +} + +template <class Impl> +typename Impl::DynInstPtr +FrontEnd<Impl>::getInst() +{ + if (instBufferSize == 0) { + return NULL; + } + + DynInstPtr inst = instBuffer.front(); + + instBuffer.pop_front(); + + --instBufferSize; + + dispatchCountStat++; + + return inst; +} + +template <class Impl> +void +FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req) +{ + DPRINTF(FE, "Processing cache completion\n"); + + // Do something here. + if (status != IcacheMissStall || + req != memReq || + switchedOut) { + DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; + return; + } + + status = IcacheMissComplete; + +/* if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheMissComplete; + } +*/ +// memcpy(cacheData, memReq->data, memReq->size); + + // Reset the completion event to NULL. +// memReq->completionEvent = NULL; + memReq = NULL; +} + +template <class Impl> +void +FrontEnd<Impl>::addFreeRegs(int num_freed) +{ + if (status == RenameBlocked && freeRegs + num_freed > 0) { + status = Running; + } + + DPRINTF(FE, "Adding %i freed registers\n", num_freed); + + freeRegs+= num_freed; + +// assert(freeRegs <= numPhysRegs); + if (freeRegs > numPhysRegs) + freeRegs = numPhysRegs; +} + +template <class Impl> +bool +FrontEnd<Impl>::updateStatus() +{ + bool serialize_block = !backEnd->robEmpty() || instBufferSize; + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + bool ret_val = false; + + if (status == SerializeBlocked && !serialize_block) { + status = SerializeComplete; + ret_val = true; + } + + if (status == BEBlocked && !be_block) { + if (barrierInst) { + status = SerializeBlocked; + } else { + status = Running; + } + ret_val = true; + } + return ret_val; +} + +template <class Impl> +void +FrontEnd<Impl>::checkBE() +{ + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + if (be_block) { + if (status == Running || status == Idle) { + status = BEBlocked; + } + } +} + +template <class Impl> +typename Impl::DynInstPtr +FrontEnd<Impl>::getInstFromCacheline() +{ + if (status == SerializeComplete) { + DynInstPtr inst = barrierInst; + status = Running; + barrierInst = NULL; + inst->clearSerializeBefore(); + return inst; + } + + InstSeqNum inst_seq; + MachInst inst; + // @todo: Fix this magic number used here to handle word offset (and + // getting rid of PAL bit) + unsigned offset = (PC & cacheBlkMask) & ~3; + + // PC of inst is not in this cache block + if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { + return NULL; + } + + ////////////////////////// + // Fetch one instruction + ////////////////////////// + + // Get a sequence number. + inst_seq = getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - sizeof(MachInst)); + + // Get the instruction from the array of the cache line. + inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset])); + + ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), + inst_seq, cpu); + + instruction->setState(thread); + + DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", + inst_seq, instruction->readPC(), + instruction->staticInst->disassemble(PC)); + + instruction->traceData = + Trace::getInstRecord(curTick, xc, cpu, + instruction->staticInst, + instruction->readPC(), 0); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + return instruction; +} + +template <class Impl> +void +FrontEnd<Impl>::renameInst(DynInstPtr &inst) +{ + DynInstPtr src_inst = NULL; + int num_src_regs = inst->numSrcRegs(); + if (num_src_regs == 0) { + inst->setCanIssue(); + } else { + for (int i = 0; i < num_src_regs; ++i) { + src_inst = renameTable[inst->srcRegIdx(i)]; + + inst->setSrcInst(src_inst, i); + + DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", + inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); + + if (src_inst->isResultReady()) { + DPRINTF(FE, "Reg ready.\n"); + inst->markSrcRegReady(i); + } else { + DPRINTF(FE, "Adding to dependent list.\n"); + src_inst->addDependent(inst); + } + } + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + RegIndex idx = inst->destRegIdx(i); + + DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " + "[sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum, + renameTable[idx]->seqNum); + + inst->setPrevDestInst(renameTable[idx], i); + + renameTable[idx] = inst; + --freeRegs; + } +} + +template <class Impl> +void +FrontEnd<Impl>::wakeFromQuiesce() +{ + DPRINTF(FE, "Waking up from quiesce\n"); + // Hopefully this is safe + status = Running; +} + +template <class Impl> +void +FrontEnd<Impl>::switchOut() +{ + switchedOut = true; + cpu->signalSwitched(); +} + +template <class Impl> +void +FrontEnd<Impl>::doSwitchOut() +{ + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template <class Impl> +void +FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + +template <class Impl> +void +FrontEnd<Impl>::dumpInsts() +{ + cprintf("instBuffer size: %i\n", instBuffer.size()); + + InstBuffIt buff_it = instBuffer.begin(); + + for (int num = 0; buff_it != instBuffer.end(); num++) { + cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*buff_it)->readPC(), (*buff_it)->threadNumber, + (*buff_it)->seqNum, (*buff_it)->isIssued(), + (*buff_it)->isSquashed()); + buff_it++; + } +} + +template <class Impl> +FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) + : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +FrontEnd<Impl>::ICacheCompletionEvent::process() +{ + frontEnd->processCacheCompletion(req); +} + +template <class Impl> +const char * +FrontEnd<Impl>::ICacheCompletionEvent::description() +{ + return "ICache completion event"; +} diff --git a/cpu/ozone/inorder_back_end.cc b/cpu/ozone/inorder_back_end.cc new file mode 100644 index 000000000..14db610d2 --- /dev/null +++ b/cpu/ozone/inorder_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/inorder_back_end_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class InorderBackEnd<SimpleImpl>; diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh new file mode 100644 index 000000000..4039d8384 --- /dev/null +++ b/cpu/ozone/inorder_back_end.hh @@ -0,0 +1,450 @@ + +#ifndef __CPU_OZONE_INORDER_BACK_END_HH__ +#define __CPU_OZONE_INORDER_BACK_END_HH__ + +#include <list> + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/exec_context.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +template <class Impl> +class InorderBackEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + + typedef typename FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + InorderBackEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setCommBuffer(TimeBuffer<CommStruct> *_comm) + { comm = _comm; } + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState<Impl> *thread_ptr); + + void regStats() { } + +#if FULL_SYSTEM + void checkInterrupts(); +#endif + + void tick(); + void executeInsts(); + void squash(const InstSeqNum &squash_num, const Addr &next_PC); + + void squashFromXC(); + void generateXCEvent() { } + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return false; } + bool isBlocked() { return status == DcacheMissStoreStall || + status == DcacheMissLoadStall || + interruptBlocked; } + + void fetchFault(Fault &fault); + + void dumpInsts(); + + private: + void handleFault(); + + void setSquashInfoFromXC(); + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + bool interruptBlocked; + + public: + template <class T> + Fault read(Addr addr, T &data, unsigned flags); + + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + template <class T> + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + void switchOut() { panic("Not implemented!"); } + void doSwitchOut() { panic("Not implemented!"); } + void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); } + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + OzoneThreadState<Impl> *thread; + + RenameTable<Impl> renameTable; + + protected: + enum Status { + Running, + Idle, + DcacheMissLoadStall, + DcacheMissStoreStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + class DCacheCompletionEvent : public Event + { + private: + InorderBackEnd *be; + + public: + DCacheCompletionEvent(InorderBackEnd *_be); + + virtual void process(); + virtual const char *description(); + + DynInstPtr inst; + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + private: + typedef typename std::list<DynInstPtr>::iterator InstListIt; + + std::list<DynInstPtr> instList; + + // General back end width. Used if the more specific isn't given. + int width; + + int latency; + + int squashLatency; + + TimeBuffer<int> numInstsToWB; + TimeBuffer<int>::wire instsAdded; + TimeBuffer<int>::wire instsToExecute; + + TimeBuffer<CommStruct> *comm; + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +}; + +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + // do functional access + DPRINTF(IBE, "Dcache hit!\n"); + } + } +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return fault; +} +#if 0 +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::read(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = thread->mem->read(req, data); + data = LittleEndianGuest::gtoh(data); + return error; +} +#endif + +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; +// memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return fault; +} +#if 0 +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::write(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < cpu->system->execContexts.size(); i++){ + xc = cpu->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return thread->mem->write(req, (T)LittleEndianGuest::htog(data)); +} +#endif + +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ +// panic("Unimplemented!"); +// memReq->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataReadReq(req); + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + req->flags &= ~INST_READ; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); + + // if we have a cache, do cache access too + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss load stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } + +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return NoFault; +} + +template <class Impl> +template <class T> +Fault +InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ +// req->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataWriteReq(req); + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&data, req->size); + + switch(req->size) { + case 1: + cpu->write(req, (uint8_t &)data); + break; + case 2: + cpu->write(req, (uint16_t &)data); + break; + case 4: + cpu->write(req, (uint32_t &)data); + break; + case 8: + cpu->write(req, (uint64_t &)data); + break; + default: + panic("Unexpected store size!\n"); + } + + if (dcacheInterface) { + req->cmd = Write; + req->data = new uint8_t[64]; + memcpy(req->data,(uint8_t *)&data,req->size); + req->completionEvent = NULL; + req->time = curTick; + req->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss store stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } +/* + if (req->flags & LOCKED) { + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + } else { + req->result = 1; + } + } +*/ +/* + if (res && (fault == NoFault)) + *res = req->result; + */ +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return NoFault; +} + +#endif // __CPU_OZONE_INORDER_BACK_END_HH__ diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh new file mode 100644 index 000000000..5a378ec76 --- /dev/null +++ b/cpu/ozone/inorder_back_end_impl.hh @@ -0,0 +1,519 @@ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/thread_state.hh" + +using namespace TheISA; + +template <class Impl> +InorderBackEnd<Impl>::InorderBackEnd(Params *params) + : squashPending(false), + squashSeqNum(0), + squashNextPC(0), + faultFromFetch(NoFault), + interruptBlocked(false), + cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), + width(params->backEndWidth), + latency(params->backEndLatency), + squashLatency(params->backEndSquashLatency), + numInstsToWB(0, latency + 1) +{ + instsAdded = numInstsToWB.getWire(latency); + instsToExecute = numInstsToWB.getWire(0); + + memReq = new MemReq; + memReq->data = new uint8_t[64]; + status = Running; +} + +template <class Impl> +std::string +InorderBackEnd<Impl>::name() const +{ + return cpu->name() + ".inorderbackend"; +} + +template <class Impl> +void +InorderBackEnd<Impl>::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; + memReq->xc = xc; +} + +template <class Impl> +void +InorderBackEnd<Impl>::setThreadState(OzoneThreadState<Impl> *thread_ptr) +{ + thread = thread_ptr; + thread->setFuncExeInst(0); +} + +#if FULL_SYSTEM +template <class Impl> +void +InorderBackEnd<Impl>::checkInterrupts() +{ + //Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + cpu->checkInterrupts = false; + + if (thread->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (thread->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpu->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) { + thread->inSyscall = true; + + thread->setMiscReg(IPR_ISR, summary); + thread->setMiscReg(IPR_INTID, ipl); + Fault(new InterruptFault)->invoke(xc); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + thread->readMiscReg(IPR_IPLR), ipl, summary); + + // May need to go 1 inst prior + squashPending = true; + + thread->inSyscall = false; + + setSquashInfoFromXC(); + } +} +#endif + +template <class Impl> +void +InorderBackEnd<Impl>::tick() +{ + // Squash due to an external source + // Not sure if this or an interrupt has higher priority + if (squashPending) { + squash(squashSeqNum, squashNextPC); + return; + } + + // if (interrupt) then set thread PC, stall front end, record that + // I'm waiting for it to drain. (for now just squash) +#if FULL_SYSTEM + if (interruptBlocked || + (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode())) { + if (!robEmpty()) { + interruptBlocked = true; + } else if (robEmpty() && cpu->inPalMode()) { + // Will need to let the front end continue a bit until + // we're out of pal mode. Hopefully we never get into an + // infinite loop... + interruptBlocked = false; + } else { + interruptBlocked = false; + checkInterrupts(); + return; + } + } +#endif + + if (status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + for (int i = 0; i < width && (*instsAdded) < width; ++i) { + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) + break; + + instList.push_back(inst); + + (*instsAdded)++; + } + +#if FULL_SYSTEM + if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) { + handleFault(); + } else { + executeInsts(); + } +#else + executeInsts(); +#endif + } +} + +template <class Impl> +void +InorderBackEnd<Impl>::executeInsts() +{ + bool completed_last_inst = true; + int insts_to_execute = *instsToExecute; + int freed_regs = 0; + + while (insts_to_execute > 0) { + assert(!instList.empty()); + DynInstPtr inst = instList.front(); + + commitPC = inst->readPC(); + + thread->setPC(commitPC); + thread->setNextPC(inst->readNextPC()); + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(IBE, "PC skip function event, stopping commit\n"); + completed_last_inst = false; + squashPending = true; + break; + } +#endif + + Fault inst_fault = NoFault; + + if (status == DcacheMissComplete) { + DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum); + status = Running; + } else if (inst->isMemRef() && status != DcacheMissComplete && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + cacheCompletionEvent.inst = inst; + inst_fault = inst->initiateAcc(); + if (inst_fault == NoFault && + status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + inst_fault = inst->completeAcc(); + } + ++thread->funcExeInst; + } else { + DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + inst_fault = inst->execute(); + ++thread->funcExeInst; + } + + // Will need to be able to break this loop in case the load + // misses. Split access/complete ops would be useful here + // with writeback events. + if (status == DcacheMissLoadStall) { + *instsToExecute = insts_to_execute; + + completed_last_inst = false; + break; + } else if (status == DcacheMissStoreStall) { + // Figure out how to fix this hack. Probably have DcacheMissLoad + // vs DcacheMissStore. + *instsToExecute = insts_to_execute; + completed_last_inst = false; +/* + instList.pop_front(); + --insts_to_execute; + if (inst->traceData) { + inst->traceData->finalize(); + } +*/ + + // Don't really need to stop for a store stall as long as + // the memory system is able to handle store forwarding + // and such. Breaking out might help avoid the cache + // interface becoming blocked. + break; + } + + inst->setExecuted(); + inst->setCompleted(); + inst->setCanCommit(); + + instList.pop_front(); + + --insts_to_execute; + --(*instsToExecute); + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + if (inst_fault != NoFault) { +#if FULL_SYSTEM + DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Hack for now; DTB will sometimes need the machine instruction + // for when faults happen. So we will set it here, prior to the + // DTB possibly needing it for this translation. + thread->setInst( + static_cast<TheISA::MachInst>(inst->staticInst->machInst)); + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + // Generate trap squash event. +// generateTrapEvent(tid); + completed_last_inst = false; + break; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + renameTable[inst->destRegIdx(i)] = inst; + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + inst->clearDependents(); + + comm->access(0)->doneSeqNum = inst->seqNum; + + if (inst->mispredicted()) { + squash(inst->seqNum, inst->readNextPC()); + + thread->setNextPC(inst->readNextPC()); + + break; + } else if (squashPending) { + // Something external happened that caused the CPU to squash. + // Break out of commit and handle the squash next cycle. + break; + } + // If it didn't mispredict, then it executed fine. Send back its + // registers and BP info? What about insts that may still have + // latency, like loads? Probably can send back the information after + // it is completed. + + // keep an instruction count + cpu->numInst++; + thread->numInsts++; + } + + frontEnd->addFreeRegs(freed_regs); + + assert(insts_to_execute >= 0); + + // Should only advance this if I have executed all instructions. + if (insts_to_execute == 0) { + numInstsToWB.advance(); + } + + // Should I set the PC to the next PC here? What do I set next PC to? + if (completed_last_inst) { + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readPC() + sizeof(MachInst)); + } + + if (squashPending) { + setSquashInfoFromXC(); + } +} + +template <class Impl> +void +InorderBackEnd<Impl>::handleFault() +{ + DPRINTF(Commit, "Handling fault from fetch\n"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + faultFromFetch->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + setSquashInfoFromXC(); +} + +template <class Impl> +void +InorderBackEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC) +{ + DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + InstListIt squash_it = --(instList.end()); + + int freed_regs = 0; + + while (!instList.empty() && (*squash_it)->seqNum > squash_num) { + DynInstPtr inst = *squash_it; + + DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n", + inst->readPC(), + inst->seqNum); + + // May cause problems with misc regs + freed_regs+= inst->numDestRegs(); + inst->clearDependents(); + squash_it--; + instList.pop_back(); + } + + frontEnd->addFreeRegs(freed_regs); + + for (int i = 0; i < latency+1; ++i) { + numInstsToWB.advance(); + } + + squashPending = false; + + // Probably want to make sure that this squash is the one that set the + // thread into inSyscall mode. + thread->inSyscall = false; + + // Tell front end to squash, reset PC to new one. + frontEnd->squash(squash_num, next_PC); + + faultFromFetch = NULL; +} + +template <class Impl> +void +InorderBackEnd<Impl>::squashFromXC() +{ + // Record that I need to squash + squashPending = true; + + thread->inSyscall = true; +} + +template <class Impl> +void +InorderBackEnd<Impl>::setSquashInfoFromXC() +{ + // Need to handle the case of the instList being empty. In that case + // probably any number works, except maybe with stores in the store buffer. + squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1; + + squashNextPC = thread->PC; +} + +template <class Impl> +void +InorderBackEnd<Impl>::fetchFault(Fault &fault) +{ + faultFromFetch = fault; +} + +template <class Impl> +void +InorderBackEnd<Impl>::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} + +template <class Impl> +InorderBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent( + InorderBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +// this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +InorderBackEnd<Impl>::DCacheCompletionEvent::process() +{ + inst->completeAcc(); + be->status = DcacheMissComplete; +} + +template <class Impl> +const char * +InorderBackEnd<Impl>::DCacheCompletionEvent::description() +{ + return "DCache completion event"; +} diff --git a/cpu/ozone/inst_queue.cc b/cpu/ozone/inst_queue.cc new file mode 100644 index 000000000..9c61602d9 --- /dev/null +++ b/cpu/ozone/inst_queue.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstQueue<SimpleImpl>; +template class InstQueue<OzoneImpl>; diff --git a/cpu/ozone/inst_queue.hh b/cpu/ozone/inst_queue.hh new file mode 100644 index 000000000..2cbbb7987 --- /dev/null +++ b/cpu/ozone/inst_queue.hh @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_INST_QUEUE_HH__ +#define __CPU_OZONE_INST_QUEUE_HH__ + +#include <list> +#include <map> +#include <queue> +#include <vector> + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +class FUPool; +class MemInterface; + +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. The IQ depends on the memory dependence unit to + * track when memory operations are ready in terms of ordering; register + * dependencies are tracked normally. Right now the IQ also handles the + * execution timing; this is mainly to allow back-to-back scheduling without + * requiring IEW to be able to peek into the IQ. At the end of the execution + * latency, the instruction is put into the queue to execute, where it will + * have the execute() function called on it. + * @todo: Make IQ able to handle multiple FU pools. + */ +template <class Impl> +class InstQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::IssueStruct IssueStruct; +/* + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; +*/ + // Typedef of iterator through the list of instructions. + typedef typename std::list<DynInstPtr>::iterator ListIt; + + friend class Impl::FullCPU; +#if 0 + /** FU completion event class. */ + class FUCompletion : public Event { + private: + /** Executing instruction. */ + DynInstPtr inst; + + /** Index of the FU used for executing. */ + int fuIdx; + + /** Pointer back to the instruction queue. */ + InstQueue<Impl> *iqPtr; + + public: + /** Construct a FU completion event. */ + FUCompletion(DynInstPtr &_inst, int fu_idx, + InstQueue<Impl> *iq_ptr); + + virtual void process(); + virtual const char *description(); + }; +#endif + /** Constructs an IQ. */ + InstQueue(Params *params); + + /** Destructs the IQ. */ + ~InstQueue(); + + /** Returns the name of the IQ. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets CPU pointer. */ + void setCPU(FullCPU *_cpu) { cpu = _cpu; } +#if 0 + /** Sets active threads list. */ + void setActiveThreads(list<unsigned> *at_ptr); + + /** Sets the IEW pointer. */ + void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } +#endif + /** Sets the timer buffer between issue and execute. */ + void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue); +#if 0 + /** Sets the global time buffer. */ + void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr); + + /** Number of entries needed for given amount of threads. */ + int entryAmount(int num_threads); + + /** Resets max entries for all threads. */ + void resetEntries(); +#endif + /** Returns total number of free entries. */ + unsigned numFreeEntries(); + + /** Returns number of free entries for a thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns whether or not the IQ is full. */ + bool isFull(); + + /** Returns whether or not the IQ is full for a specific thread. */ + bool isFull(unsigned tid); + + /** Returns if there are any ready instructions in the IQ. */ + bool hasReadyInsts(); + + /** Inserts a new instruction into the IQ. */ + void insert(DynInstPtr &new_inst); + + /** Inserts a new, non-speculative instruction into the IQ. */ + void insertNonSpec(DynInstPtr &new_inst); +#if 0 + /** + * Advances the tail of the IQ, used if an instruction is not added to the + * IQ for scheduling. + * @todo: Rename this function. + */ + void advanceTail(DynInstPtr &inst); + + /** Process FU completion event. */ + void processFUCompletion(DynInstPtr &inst, int fu_idx); +#endif + /** + * Schedules ready instructions, adding the ready ones (oldest first) to + * the queue to execute. + */ + void scheduleReadyInsts(); + + /** Schedules a single specific non-speculative instruction. */ + void scheduleNonSpec(const InstSeqNum &inst); + + /** + * Commits all instructions up to and including the given sequence number, + * for a specific thread. + */ + void commit(const InstSeqNum &inst, unsigned tid = 0); + + /** Wakes all dependents of a completed instruction. */ + void wakeDependents(DynInstPtr &completed_inst); + + /** Adds a ready memory instruction to the ready list. */ + void addReadyMemInst(DynInstPtr &ready_inst); +#if 0 + /** + * Reschedules a memory instruction. It will be ready to issue once + * replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &resched_inst); + + /** Replays a memory instruction. It must be rescheduled first. */ + void replayMemInst(DynInstPtr &replay_inst); +#endif + /** Completes a memory operation. */ + void completeMemInst(DynInstPtr &completed_inst); +#if 0 + /** Indicates an ordering violation between a store and a load. */ + void violation(DynInstPtr &store, DynInstPtr &faulting_load); +#endif + /** + * Squashes instructions for a thread. Squashing information is obtained + * from the time buffer. + */ + void squash(unsigned tid); // Probably want the ISN + + /** Returns the number of used entries for a thread. */ + unsigned getCount(unsigned tid) { return count[tid]; }; + + /** Updates the number of free entries. */ + void updateFreeEntries(int num) { freeEntries += num; } + + /** Debug function to print all instructions. */ + void printInsts(); + + private: + /** Does the actual squashing. */ + void doSquash(unsigned tid); + + ///////////////////////// + // Various pointers + ///////////////////////// + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Cache interface. */ + MemInterface *dcacheInterface; +#if 0 + /** Pointer to IEW stage. */ + IEW *iewStage; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit[Impl::MaxThreads]; +#endif + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer<IssueStruct> *issueToExecuteQueue; +#if 0 + /** The backwards time buffer. */ + TimeBuffer<TimeStruct> *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer<TimeStruct>::wire fromCommit; + + /** Function unit pool. */ + FUPool *fuPool; +#endif + ////////////////////////////////////// + // Instruction lists, ready queues, and ordering + ////////////////////////////////////// + + /** List of all the instructions in the IQ (some of which may be issued). */ + std::list<DynInstPtr> instList[Impl::MaxThreads]; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for an IQ entry. It includes the instruction and an iterator + * to the instruction's spot in the IQ. + */ + struct IQEntry { + DynInstPtr inst; + ListIt iqIt; + }; + + typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> + ReadyInstQueue; + + typedef std::map<DynInstPtr, pqCompare> ReadyInstMap; + typedef typename std::map<DynInstPtr, pqCompare>::iterator ReadyMapIt; + + /** List of ready instructions. + */ + ReadyInstQueue readyInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map<InstSeqNum, DynInstPtr> nonSpecInsts; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt; +#if 0 + /** Entry for the list age ordering by op class. */ + struct ListOrderEntry { + OpClass queueType; + InstSeqNum oldestInst; + }; + + /** List that contains the age order of the oldest instruction of each + * ready queue. Used to select the oldest instruction available + * among op classes. + */ + std::list<ListOrderEntry> listOrder; + + typedef typename std::list<ListOrderEntry>::iterator ListOrderIt; + + /** Tracks if each ready queue is on the age order list. */ + bool queueOnList[Num_OpClasses]; + + /** Iterators of each ready queue. Points to their spot in the age order + * list. + */ + ListOrderIt readyIt[Num_OpClasses]; + + /** Add an op class to the age order list. */ + void addToOrderList(OpClass op_class); + + /** + * Called when the oldest instruction has been removed from a ready queue; + * this places that ready queue into the proper spot in the age order list. + */ + void moveToYoungerInst(ListOrderIt age_order_it); +#endif + ////////////////////////////////////// + // Various parameters + ////////////////////////////////////// +#if 0 + /** IQ Resource Sharing Policy */ + enum IQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** IQ sharing policy for SMT. */ + IQPolicy iqPolicy; +#endif + /** Number of Total Threads*/ + unsigned numThreads; +#if 0 + /** Pointer to list of active threads. */ + list<unsigned> *activeThreads; +#endif + /** Per Thread IQ count */ + unsigned count[Impl::MaxThreads]; + + /** Max IQ Entries Per Thread */ + unsigned maxEntries[Impl::MaxThreads]; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; +#if 0 + /** The number of physical registers in the CPU. */ + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; +#endif + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt[Impl::MaxThreads]; +#if 0 + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector<bool> regScoreboard; + + /** Adds an instruction to the dependency graph, as a producer. */ + bool addToDependents(DynInstPtr &new_inst); + + /** Adds an instruction to the dependency graph, as a consumer. */ + void createDependency(DynInstPtr &new_inst); +#endif + /** Moves an instruction to the ready queue if it is ready. */ + void addIfReady(DynInstPtr &inst); + + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); +#if 0 + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); +#endif + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + /** Debugging function to dump out all instructions that are in the + * IQ. + */ + void dumpInsts(); + + /** Stat for number of instructions added. */ + Stats::Scalar<> iqInstsAdded; + /** Stat for number of non-speculative instructions added. */ + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + /** Stat for number of integer instructions issued. */ + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + /** Stat for number of floating point instructions issued. */ + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + /** Stat for number of branch instructions issued. */ + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + /** Stat for number of memory instructions issued. */ + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + /** Stat for number of miscellaneous instructions issued. */ + Stats::Scalar<> iqMiscInstsIssued; + /** Stat for number of squashed instructions that were ready to issue. */ + Stats::Scalar<> iqSquashedInstsIssued; + /** Stat for number of squashed instructions examined when squashing. */ + Stats::Scalar<> iqSquashedInstsExamined; + /** Stat for number of squashed instruction operands examined when + * squashing. + */ + Stats::Scalar<> iqSquashedOperandsExamined; + /** Stat for number of non-speculative instructions removed due to a squash. + */ + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__CPU_OZONE_INST_QUEUE_HH__ diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh new file mode 100644 index 000000000..0523c68d6 --- /dev/null +++ b/cpu/ozone/inst_queue_impl.hh @@ -0,0 +1,1341 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include <vector> + +#include "sim/root.hh" + +#include "cpu/ozone/inst_queue.hh" +#if 0 +template <class Impl> +InstQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst, + int fu_idx, + InstQueue<Impl> *iq_ptr) + : Event(&mainEventQueue, Stat_Event_Pri), + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +InstQueue<Impl>::FUCompletion::process() +{ + iqPtr->processFUCompletion(inst, fuIdx); +} + + +template <class Impl> +const char * +InstQueue<Impl>::FUCompletion::description() +{ + return "Functional unit completion event"; +} +#endif +template <class Impl> +InstQueue<Impl>::InstQueue(Params *params) + : dcacheInterface(params->dcacheInterface), +// fuPool(params->fuPool), + numEntries(params->numIQEntries), + totalWidth(params->issueWidth), +// numPhysIntRegs(params->numPhysIntRegs), +// numPhysFloatRegs(params->numPhysFloatRegs), + commitToIEWDelay(params->commitToIEWDelay) +{ +// assert(fuPool); + +// numThreads = params->numberOfThreads; + numThreads = 1; + + //Initialize thread IQ counts + for (int i = 0; i <numThreads; i++) { + count[i] = 0; + } + + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Set the number of physical registers as the number of int + float +// numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + +// DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. +// dependGraph = new DependencyEntry[numPhysRegs]; + + // Resize the register scoreboard. +// regScoreboard.resize(numPhysRegs); +/* + //Initialize Mem Dependence Units + for (int i = 0; i < numThreads; i++) { + memDepUnit[i].init(params,i); + memDepUnit[i].setIQ(this); + } + + // Initialize all the head pointers to point to NULL, and all the + // entries as unready. + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) { + dependGraph[i].next = NULL; + dependGraph[i].inst = NULL; + regScoreboard[i] = false; + } +*/ + for (int i = 0; i < numThreads; ++i) { + squashedSeqNum[i] = 0; + } +/* + for (int i = 0; i < Num_OpClasses; ++i) { + queueOnList[i] = false; + readyIt[i] = listOrder.end(); + } + + string policy = params->smtIQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out resource sharing policy + if (policy == "dynamic") { + iqPolicy = Dynamic; + + //Set Max Entries to Total ROB Capacity + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = numEntries; + } + + } else if (policy == "partitioned") { + iqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide ROB up evenly + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = part_amt; + } + + DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + "%i entries per thread.\n",part_amt); + + } else if (policy == "threshold") { + iqPolicy = Threshold; + + double threshold = (double)params->smtIQThreshold / 100; + + int thresholdIQ = (int)((double)threshold * numEntries); + + //Divide up by threshold amount + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = thresholdIQ; + } + + DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + "%i entries per thread.\n",thresholdIQ); + } else { + assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } +*/ +} + +template <class Impl> +InstQueue<Impl>::~InstQueue() +{ + // Clear the dependency graph +/* + DependencyEntry *curr; + DependencyEntry *prev; + + for (int i = 0; i < numPhysRegs; ++i) { + curr = dependGraph[i].next; + + while (curr) { + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } + + assert(DependencyEntry::mem_alloc_counter == 0); + + delete [] dependGraph; +*/ +} + +template <class Impl> +std::string +InstQueue<Impl>::name() const +{ + return cpu->name() + ".iq"; +} + +template <class Impl> +void +InstQueue<Impl>::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); +/* + for ( int i=0; i < numThreads; i++) { + // Tell mem dependence unit to reg stats as well. + memDepUnit[i].regStats(); + } +*/ +} +/* +template <class Impl> +void +InstQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr) +{ + DPRINTF(IQ, "Setting active threads list pointer.\n"); + activeThreads = at_ptr; +} +*/ +template <class Impl> +void +InstQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr) +{ + DPRINTF(IQ, "Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} +/* +template <class Impl> +void +InstQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr) +{ + DPRINTF(IQ, "Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +template <class Impl> +int +InstQueue<Impl>::entryAmount(int num_threads) +{ + if (iqPolicy == Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + + +template <class Impl> +void +InstQueue<Impl>::resetEntries() +{ + if (iqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list<unsigned>::iterator threads = (*activeThreads).begin(); + list<unsigned>::iterator list_end = (*activeThreads).end(); + + while (threads != list_end) { + if (iqPolicy == Partitioned) { + maxEntries[*threads++] = numEntries / active_threads; + } else if(iqPolicy == Threshold && active_threads == 1) { + maxEntries[*threads++] = numEntries; + } + } + } +} +*/ +template <class Impl> +unsigned +InstQueue<Impl>::numFreeEntries() +{ + return freeEntries; +} + +template <class Impl> +unsigned +InstQueue<Impl>::numFreeEntries(unsigned tid) +{ + return maxEntries[tid] - count[tid]; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template <class Impl> +bool +InstQueue<Impl>::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template <class Impl> +bool +InstQueue<Impl>::isFull(unsigned tid) +{ + if (numFreeEntries(tid) == 0) { + return(true); + } else { + return(false); + } +} + +template <class Impl> +bool +InstQueue<Impl>::hasReadyInsts() +{ +/* + if (!listOrder.empty()) { + return true; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + return true; + } + } + + return false; +*/ + return readyInsts.empty(); +} + +template <class Impl> +void +InstQueue<Impl>::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); +*/ + // If it's a memory instruction, add it to the memory dependency + // unit. +// if (new_inst->isMemRef()) { +// memDepUnit[new_inst->threadNumber].insert(new_inst); +// } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); +// } + + ++iqInstsAdded; + + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} + +template <class Impl> +void +InstQueue<Impl>::insertNonSpec(DynInstPtr &new_inst) +{ + nonSpecInsts[new_inst->seqNum] = new_inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } +*/ + ++iqNonSpecInstsAdded; + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} +/* +template <class Impl> +void +InstQueue<Impl>::advanceTail(DynInstPtr &inst) +{ + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +template <class Impl> +void +InstQueue<Impl>::addToOrderList(OpClass op_class) +{ + assert(!readyInsts[op_class].empty()); + + ListOrderEntry queue_entry; + + queue_entry.queueType = op_class; + + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + ListOrderIt list_it = listOrder.begin(); + ListOrderIt list_end_it = listOrder.end(); + + while (list_it != list_end_it) { + if ((*list_it).oldestInst > queue_entry.oldestInst) { + break; + } + + list_it++; + } + + readyIt[op_class] = listOrder.insert(list_it, queue_entry); + queueOnList[op_class] = true; +} + +template <class Impl> +void +InstQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it) +{ + // Get iterator of next item on the list + // Delete the original iterator + // Determine if the next item is either the end of the list or younger + // than the new instruction. If so, then add in a new iterator right here. + // If not, then move along. + ListOrderEntry queue_entry; + OpClass op_class = (*list_order_it).queueType; + ListOrderIt next_it = list_order_it; + + ++next_it; + + queue_entry.queueType = op_class; + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + while (next_it != listOrder.end() && + (*next_it).oldestInst < queue_entry.oldestInst) { + ++next_it; + } + + readyIt[op_class] = listOrder.insert(next_it, queue_entry); +} + +template <class Impl> +void +InstQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx) +{ + // The CPU could have been sleeping until this op completed (*extremely* + // long latency op). Wake it if it was. This may be overkill. + iewStage->wakeCPU(); + + fuPool->freeUnit(fu_idx); + + int &size = issueToExecuteQueue->access(0)->size; + + issueToExecuteQueue->access(0)->insts[size++] = inst; +} +*/ +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template <class Impl> +void +InstQueue<Impl>::scheduleReadyInsts() +{ + DPRINTF(IQ, "Attempting to schedule ready instructions from " + "the IQ.\n"); + +// IssueStruct *i2e_info = issueToExecuteQueue->access(0); +/* + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + if (!queueOnList[i]) { + addToOrderList(OpClass(i)); + } else if (readyInsts[i].top()->seqNum < + (*readyIt[i]).oldestInst) { + listOrder.erase(readyIt[i]); + addToOrderList(OpClass(i)); + } + } + } + + // Have iterator to head of the list + // While I haven't exceeded bandwidth or reached the end of the list, + // Try to get a FU that can do what this op needs. + // If successful, change the oldestInst to the new top of the list, put + // the queue in the proper place in the list. + // Increment the iterator. + // This will avoid trying to schedule a certain op class if there are no + // FUs that handle it. + ListOrderIt order_it = listOrder.begin(); + ListOrderIt order_end_it = listOrder.end(); + int total_issued = 0; + int exec_queue_slot = i2e_info->size; + + while (exec_queue_slot < totalWidth && order_it != order_end_it) { + OpClass op_class = (*order_it).queueType; + + assert(!readyInsts[op_class].empty()); + + DynInstPtr issuing_inst = readyInsts[op_class].top(); + + assert(issuing_inst->seqNum == (*order_it).oldestInst); + + if (issuing_inst->isSquashed()) { + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + listOrder.erase(order_it++); + + ++iqSquashedInstsIssued; + + continue; + } + + int idx = fuPool->getUnit(op_class); + + if (idx != -1) { + int op_latency = fuPool->getOpLatency(op_class); + + if (op_latency == 1) { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } else { + int issue_latency = fuPool->getIssueLatency(op_class); + + if (issue_latency > 1) { + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); + + execution->schedule(curTick + issue_latency - 1); + } else { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } + } + + DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " + "[sn:%lli]\n", + issuing_inst->threadNumber, issuing_inst->readPC(), + issuing_inst->seqNum); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + issuing_inst->setIssued(); + ++total_issued; + + if (!issuing_inst->isMemRef()) { + // Memory instructions can not be freed from the IQ until they + // complete. + ++freeEntries; + count[issuing_inst->threadNumber]--; + issuing_inst->removeInIQ(); + } else { + memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); + } + + listOrder.erase(order_it++); + } else { + ++order_it; + } + } + + if (total_issued) { + cpu->activityThisCycle(); + } else { + DPRINTF(IQ, "Not able to schedule any instructions.\n"); + } +*/ +} + +template <class Impl> +void +InstQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + +// unsigned tid = (*inst_it).second->threadNumber; + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. +// if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); +// } else { +// memDepUnit[tid].nonSpecInstReady((*inst_it).second); +// } + + nonSpecInsts.erase(inst_it); +} + +template <class Impl> +void +InstQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid) +{ + /*Need to go through each thread??*/ + DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", + tid,inst); + + ListIt iq_it = instList[tid].begin(); + + while (iq_it != instList[tid].end() && + (*iq_it)->seqNum <= inst) { + ++iq_it; + instList[tid].pop_front(); + } + + assert(freeEntries == (numEntries - countInsts())); +} + +template <class Impl> +void +InstQueue<Impl>::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); + // Look at the physical destination register of the DynInst + // and look it up on the dependency graph. Then mark as ready + // any instructions within the instruction queue. +/* + DependencyEntry *curr; + DependencyEntry *prev; +*/ + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. Also complete the memory + // instruction at this point since we know it executed fine. + // @todo: Might want to rename "completeMemInst" to + // something that indicates that it won't need to be replayed, and call + // this earlier. Might not be a big deal. + if (completed_inst->isMemRef()) { +// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); + completeMemInst(completed_inst); + } + completed_inst->wakeDependents(); +/* + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + + curr = dependGraph[dest_reg].next; + + while (curr) { + DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +*/ +} + +template <class Impl> +void +InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst) +{ + OpClass op_class = ready_inst->opClass(); + + readyInsts.push(ready_inst); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + ready_inst->readPC(), op_class, ready_inst->seqNum); +} +/* +template <class Impl> +void +InstQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst) +{ + memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); +} + +template <class Impl> +void +InstQueue<Impl>::replayMemInst(DynInstPtr &replay_inst) +{ + memDepUnit[replay_inst->threadNumber].replay(replay_inst); +} +*/ +template <class Impl> +void +InstQueue<Impl>::completeMemInst(DynInstPtr &completed_inst) +{ + int tid = completed_inst->threadNumber; + + DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n", + completed_inst->readPC(), completed_inst->seqNum); + + ++freeEntries; + +// completed_inst->memOpDone = true; + +// memDepUnit[tid].completed(completed_inst); + + count[tid]--; +} +/* +template <class Impl> +void +InstQueue<Impl>::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit[store->threadNumber].violation(store, faulting_load); +} +*/ +template <class Impl> +void +InstQueue<Impl>::squash(unsigned tid) +{ + DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in " + "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. +// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt[tid] = instList[tid].end(); + --squashIt[tid]; + + // Call doSquash if there are insts in the IQ + if (count[tid] > 0) { + doSquash(tid); + } + + // Also tell the memory dependence unit to squash. +// memDepUnit[tid].squash(squashedSeqNum[tid], tid); +} + +template <class Impl> +void +InstQueue<Impl>::doSquash(unsigned tid) +{ + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum[tid] != 0); + + DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", + tid, squashedSeqNum[tid]); + + // Squash any instructions younger than the squashed sequence number + // given. + while (squashIt[tid] != instList[tid].end() && + (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { + + DynInstPtr squashed_inst = (*squashIt[tid]); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (squashed_inst->threadNumber != tid || + squashed_inst->isSquashedInIQ()) { + --squashIt[tid]; + continue; + } + + if (!squashed_inst->isIssued() || + (squashed_inst->isMemRef()/* && + !squashed_inst->memOpDone*/)) { + + // Remove the instruction from the dependency list. + if (!squashed_inst->isNonSpeculative()) { +/* + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + // Instead of doing a linked list traversal, we can just + // remove these squashed instructions either at issue time, + // or when the register is overwritten. The only downside + // to this is it leaves more room for error. + + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + + ++iqSquashedOperandsExamined; + } +*/ + // Might want to remove producers as well. + } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + // @todo: Remove this hack where several statuses are set so the + // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); +// squashed_inst->removeInIQ(); + + //Update Thread IQ Count + count[squashed_inst->threadNumber]--; + + ++freeEntries; + + if (numThreads > 1) { + DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n", + tid, squashed_inst->readPC()); + } else { + DPRINTF(IQ, "Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + } + + --squashIt[tid]; + ++iqSquashedInstsExamined; + } +} +/* +template <class Impl> +void +InstQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template <class Impl> +void +InstQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template <class Impl> +bool +InstQueue<Impl>::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template <class Impl> +void +InstQueue<Impl>::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("Dependency graph %i not empty!", dest_reg); + } + + dependGraph[dest_reg].inst = new_inst; + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} +*/ +template <class Impl> +void +InstQueue<Impl>::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isMemRef()) { + + DPRINTF(IQ, "Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + +// memDepUnit[inst->threadNumber].regsReady(inst); + + return; + } + + OpClass op_class = inst->opClass(); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + inst->readPC(), op_class, inst->seqNum); + + readyInsts.push(inst); + } +} + +template <class Impl> +int +InstQueue<Impl>::countInsts() +{ + //ksewell:This works but definitely could use a cleaner write + //with a more intuitive way of counting. Right now it's + //just brute force .... + +#if 0 + int total_insts = 0; + + for (int i = 0; i < numThreads; ++i) { + ListIt count_it = instList[i].begin(); + + while (count_it != instList[i].end()) { + if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } else if ((*count_it)->isMemRef() && + !(*count_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++total_insts; + } + } + + ++count_it; + } + } + + return total_insts; +#else + return numEntries - freeEntries; +#endif +} +/* +template <class Impl> +void +InstQueue<Impl>::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } +} +*/ +template <class Impl> +void +InstQueue<Impl>::dumpLists() +{ + for (int i = 0; i < Num_OpClasses; ++i) { + cprintf("Ready list %i size: %i\n", i, readyInsts.size()); + + cprintf("\n"); + } + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + NonSpecMapIt non_spec_it = nonSpecInsts.begin(); + NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); + + cprintf("Non speculative list: "); + + while (non_spec_it != non_spec_end_it) { + cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(), + (*non_spec_it).second->seqNum); + ++non_spec_it; + } + + cprintf("\n"); +/* + ListOrderIt list_order_it = listOrder.begin(); + ListOrderIt list_order_end_it = listOrder.end(); + int i = 1; + + cprintf("List order: "); + + while (list_order_it != list_order_end_it) { + cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType, + (*list_order_it).oldestInst); + + ++list_order_it; + ++i; + } +*/ + cprintf("\n"); +} + + +template <class Impl> +void +InstQueue<Impl>::dumpInsts() +{ + for (int i = 0; i < numThreads; ++i) { +// int num = 0; +// int valid_num = 0; +/* + ListIt inst_list_it = instList[i].begin(); + + while (inst_list_it != instList[i].end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +*/ + } +} diff --git a/cpu/ozone/lsq_unit.cc b/cpu/ozone/lsq_unit.cc new file mode 100644 index 000000000..3ac51b87d --- /dev/null +++ b/cpu/ozone/lsq_unit.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLSQ<OzoneImpl>; + diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh new file mode 100644 index 000000000..4b600af67 --- /dev/null +++ b/cpu/ozone/lsq_unit.hh @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LSQ_UNIT_HH__ +#define __CPU_OZONE_LSQ_UNIT_HH__ + +#include <map> +#include <queue> +#include <algorithm> + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template <class Impl> +class OzoneLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + int storeIdx; + /** The writeback event for the store. Needed for store + * conditionals. + */ + Event *wbEvent; + /** The pointer to the LSQ unit that issued the store. */ + OzoneLSQ<Impl> *lsqPtr; + }; + + friend class StoreCompletionEvent; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ + void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx); + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits a specific load, given by the sequence number. */ + void commitLoad(InstSeqNum &inst); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + inline bool loadBlocked(); + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed && + !dcacheInterface->isBlocked(); } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ + PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::vector<SQEntry> storeQueue; + + /** The load queue. */ + std::vector<DynInstPtr> loadQueue; + + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that is ready to be written back, + * and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list<InstSeqNum> mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer<IssueStruct>::wire fromIssue; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template <class Impl> +template <class T> +Fault +OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->isExecuted()) { + panic("Should not reach this point with split ops!"); + + memcpy(&data,req->data,req->size); + + return NoFault; + } + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->flags & UNCACHEABLE && + (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { + + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = loadQueue[load_idx]->sqIdx; + + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->paddr); + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + + int shift_amt = req->vaddr & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->vaddr, *(req->data)); + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + loadQueue[load_idx]->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(loadQueue[load_idx]); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->vaddr); + + return NoFault; + } + } + + + // If there's no forwarding case, then go access memory + DynInstPtr inst = loadQueue[load_idx]; + + ++usedPorts; + + // if we have a cache, do cache access too + if (dcacheInterface) { + if (dcacheInterface->isBlocked()) { + isLoadBlocked = true; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " + "vaddr:%#x flags:%i\n", + inst->readPC(), req->paddr, req->vaddr, req->flags); + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + assert(!req->completionEvent); + typedef typename BackEnd::LdWritebackEvent LdWritebackEvent; + + LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be); + + req->completionEvent = wb; + + // Do Cache Access + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + // @todo: Probably should support having no events + if (result != MA_HIT) { + DPRINTF(OzoneLSQ, "D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + wb->setDcacheMiss(); + + } else { +// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + DPRINTF(OzoneLSQ, "D-cache hit!\n"); + } + } else { + fatal("Must use D-cache with new memory system"); + } + + return NoFault; +} + +template <class Impl> +template <class T> +Fault +OzoneLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->paddr, data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +template <class Impl> +inline bool +OzoneLSQ<Impl>::loadBlocked() +{ + bool ret_val = isLoadBlocked; + isLoadBlocked = false; + return ret_val; +} + +#endif // __CPU_OZONE_LSQ_UNIT_HH__ diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh new file mode 100644 index 000000000..726348d76 --- /dev/null +++ b/cpu/ozone/lsq_unit_impl.hh @@ -0,0 +1,846 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lsq_unit.hh" + +template <class Impl> +OzoneLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx, + Event *wb_event, + OzoneLSQ<Impl> *lsq_ptr) + : Event(&mainEventQueue), + storeIdx(store_idx), + wbEvent(wb_event), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +OzoneLSQ<Impl>::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (wbEvent) + wbEvent->process(); + lsqPtr->completeStore(storeIdx); +} + +template <class Impl> +const char * +OzoneLSQ<Impl>::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template <class Impl> +OzoneLSQ<Impl>::OzoneLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false) +{ +} + +template<class Impl> +void +OzoneLSQ<Impl>::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) + +{ + DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + + // May want to initialize these entries to NULL + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; +} + +template<class Impl> +std::string +OzoneLSQ<Impl>::name() const +{ + return "lsqunit"; +} + +template<class Impl> +void +OzoneLSQ<Impl>::clearLQ() +{ + loadQueue.clear(); +} + +template<class Impl> +void +OzoneLSQ<Impl>::clearSQ() +{ + storeQueue.clear(); +} + +template<class Impl> +void +OzoneLSQ<Impl>::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} + +template<class Impl> +void +OzoneLSQ<Impl>::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template<class Impl> +void +OzoneLSQ<Impl>::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template <class Impl> +void +OzoneLSQ<Impl>::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + +// inst->setInLSQ(); +} + +template <class Impl> +void +OzoneLSQ<Impl>::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template <class Impl> +void +OzoneLSQ<Impl>::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; + +} + +template <class Impl> +typename Impl::DynInstPtr +OzoneLSQ<Impl>::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template <class Impl> +unsigned +OzoneLSQ<Impl>::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template <class Impl> +int +OzoneLSQ<Impl>::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +#if 0 +template <class Impl> +Fault +OzoneLSQ<Impl>::executeLoad() +{ + Fault load_fault = NoFault; + DynInstPtr load_inst; + + assert(readyLoads.size() != 0); + + // Execute a ready load. + LdMapIt ready_it = readyLoads.begin(); + + load_inst = (*ready_it).second; + + // Execute the instruction, which is held in the data portion of the + // iterator. + load_fault = load_inst->execute(); + + // If it executed successfully, then switch it over to the executed + // loads list. + if (load_fault == NoFault) { + executedLoads[load_inst->seqNum] = load_inst; + + readyLoads.erase(ready_it); + } else { + loadFaultInst = load_inst; + } + + return load_fault; +} +#endif + +template <class Impl> +Fault +OzoneLSQ<Impl>::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. +// iewStage->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template <class Impl> +Fault +OzoneLSQ<Impl>::executeLoad(int lq_idx) +{ + // Very hackish. Not sure the best way to check that this + // instruction is at the head of the ROB. I should have some sort + // of extra information here so that I'm not overloading the + // canCommit signal for 15 different things. + loadQueue[lq_idx]->setCanCommit(); + Fault ret_fault = executeLoad(loadQueue[lq_idx]); + loadQueue[lq_idx]->clearCanCommit(); + return ret_fault; +} + +template <class Impl> +Fault +OzoneLSQ<Impl>::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = storeQueue[store_idx].size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isNonSpeculative()) { + // Nonspeculative accesses (namely store conditionals) + // need to set themselves as able to writeback if we + // haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return TheISA::genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template <class Impl> +void +OzoneLSQ<Impl>::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template <class Impl> +void +OzoneLSQ<Impl>::commitLoad(InstSeqNum &inst) +{ + // Hopefully I don't use this function too much + panic("Don't use this function!"); + + int i = loadHead; + while (1) { + if (i == loadTail) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i]->seqNum == inst) { + break; + } + + ++i; + if (i >= LQEntries) { + i = 0; + } + } + +// loadQueue[i]->removeInLSQ(); + loadQueue[i] = NULL; + --loads; +} + +template <class Impl> +void +OzoneLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template <class Impl> +void +OzoneLSQ<Impl>::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + +// --stores; + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template <class Impl> +void +OzoneLSQ<Impl>::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + MemReqPtr req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + +// Fault fault = cpu->translateDataReadReq(req); + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), + req->paddr, *(req->data), + storeQueue[storeWBIdx].inst->seqNum); + +// if (fault != NoFault) { + //What should we do if there is a fault??? + //for now panic +// panic("Page Table Fault!!!!!\n"); +// } + + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + //@todo temp fix for LL/SC (works fine for 1 CPU) + if (req->flags & LOCKED) { + req->result=1; + panic("LL/SC! oh no no support!!!"); + } + + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + Event *wb = NULL; +/* + typename IEW::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=0; + wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + } +*/ + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + // Will stores need their own kind of writeback events? + // Do stores even need writeback events? + assert(!req->completionEvent); + req->completionEvent = new + StoreCompletionEvent(storeWBIdx, wb, this); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // Increment stat here or something + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=1; + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst, + be); + wb->schedule(curTick); + } + + completeStore(storeWBIdx); + } + + incrStIdx(storeWBIdx); + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template <class Impl> +void +OzoneLSQ<Impl>::removeMSHR(InstSeqNum seqNum) +{ + list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template <class Impl> +void +OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + +// loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + +// storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + if (storeQueue[store_idx].req) { + assert(!storeQueue[store_idx].req->completionEvent); + } + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template <class Impl> +void +OzoneLSQ<Impl>::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum, + loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum, + storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} + +template <class Impl> +void +OzoneLSQ<Impl>::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. +// cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + +// be->updateLSQNextCycle = true; + } + + DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } +} + +template <class Impl> +inline void +OzoneLSQ<Impl>::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template <class Impl> +inline void +OzoneLSQ<Impl>::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template <class Impl> +inline void +OzoneLSQ<Impl>::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template <class Impl> +inline void +OzoneLSQ<Impl>::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} diff --git a/cpu/ozone/lw_back_end.cc b/cpu/ozone/lw_back_end.cc new file mode 100644 index 000000000..8e9a56ef5 --- /dev/null +++ b/cpu/ozone/lw_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/lw_back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +template class LWBackEnd<OzoneImpl>; diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh new file mode 100644 index 000000000..1c03ffb73 --- /dev/null +++ b/cpu/ozone/lw_back_end.hh @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_BACK_END_HH__ +#define __CPU_OZONE_LW_BACK_END_HH__ + +#include <list> +#include <queue> +#include <set> +#include <string> + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/functional/functional.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" +#include "sim/eventq.hh" + +template <class> +class Checker; +class ExecContext; + +template <class Impl> +class OzoneThreadState; + +template <class Impl> +class LWBackEnd +{ + public: + typedef OzoneThreadState<Impl> Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer<DispatchToIssue> d2i; + typename TimeBuffer<DispatchToIssue>::wire instsToDispatch; + TimeBuffer<IssueToExec> i2e; + typename TimeBuffer<IssueToExec>::wire instsToExecute; + TimeBuffer<ExecToCommit> e2c; + TimeBuffer<Writeback> numInstsToWB; + + TimeBuffer<CommStruct> *comm; + typename TimeBuffer<CommStruct>::wire toIEW; + typename TimeBuffer<CommStruct>::wire fromCommit; + + class TrapEvent : public Event { + private: + LWBackEnd<Impl> *be; + + public: + TrapEvent(LWBackEnd<Impl> *_be); + + void process(); + const char *description(); + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + LWBackEnd *be; + + bool dcacheMiss; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + + void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } + }; + + LWBackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer<CommStruct> *_comm); + + void tick(); + void squash(); + void generateXCEvent() { xcSquash = true; } + void squashFromXC(); + void squashFromTrap(); + void checkInterrupts(); + bool trapSquash; + bool xcSquash; + + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + Tick lastCommitCycle; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + void fetchFault(Fault &fault); + + int wakeDependents(DynInstPtr &inst, bool memory_deps = false); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) { } + + void addDcacheMiss(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeDcacheMiss(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void addWaitingMemOp(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeWaitingMemOp(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void instToCommit(DynInstPtr &inst); + + void switchOut(); + void doSwitchOut(); + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + private: + void generateTrapEvent(Tick latency = 0); + void handleFault(Fault &fault, Tick latency = 0); + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void executeInsts(); + void commitInsts(); + void addToLSQ(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemViolation(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked, + TrapPending + }; + + Status status; + + Status dispatchStatus; + + Status commitStatus; + + Counter funcExeInst; + + private: + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable<Impl> commitRenameTable; + + RenameTable<Impl> renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + LWBackEnd *be; + + public: + DCacheCompletionEvent(LWBackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + MemReqPtr memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int waitingInsts; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + std::set<InstSeqNum> waitingMemOps; + typedef std::set<InstSeqNum>::iterator MemIt; + int numWaitingMemOps; + unsigned maxOutstandingMemOps; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + bool fetchHasFault; + + bool switchedOut; + bool switchPending; + + DynInstPtr memBarrier; + + private: + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue; + ReadyInstQueue exeList; + + typedef typename std::list<DynInstPtr>::iterator InstListIt; + + std::list<DynInstPtr> instList; + std::list<DynInstPtr> waitingList; + std::list<DynInstPtr> replayList; + std::list<DynInstPtr> writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Vector<> squashedInsts; + Stats::Vector<> ROBSquashedInsts; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); + + Checker<DynInstPtr> *checker; +}; + +template <class Impl> +template <class T> +Fault +LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ + return LSQ.read(req, data, load_idx); +} + +template <class Impl> +template <class T> +Fault +LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_LW_BACK_END_HH__ diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh new file mode 100644 index 000000000..41b4ea24b --- /dev/null +++ b/cpu/ozone/lw_back_end_impl.hh @@ -0,0 +1,1693 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/checker/cpu.hh" +#include "cpu/ozone/lw_back_end.hh" +#include "encumbered/cpu/full/op_class.hh" + +template <class Impl> +void +LWBackEnd<Impl>::generateTrapEvent(Tick latency) +{ + DPRINTF(BE, "Generating trap event\n"); + + TrapEvent *trap = new TrapEvent(this); + + trap->schedule(curTick + cpu->cycles(latency)); + + thread->trapPending = true; +} + +template <class Impl> +int +LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps) +{ + assert(!inst->isSquashed()); + std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() : + inst->getDependents(); + int num_outputs = dependents.size(); + + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr dep_inst = dependents[i]; + if (!memory_deps) { + dep_inst->markSrcRegReady(); + } else { + if (!dep_inst->isSquashed()) + dep_inst->markMemInstReady(inst.get()); + } + + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->isInROB() && + !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && + !dep_inst->isWriteBarrier()) { + DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", + dep_inst->seqNum); + exeList.push(dep_inst); + if (dep_inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(dep_inst->iqIt); + waitingInsts--; + dep_inst->iqItValid = false; + assert(waitingInsts >= 0); + } + if (dep_inst->isMemRef()) { + removeWaitingMemOp(dep_inst); + DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n", + dep_inst->seqNum); + } + } + } + return num_outputs; +} + +template <class Impl> +void +LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst) +{ + replayList.push_front(inst); +} + +template <class Impl> +LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +LWBackEnd<Impl>::TrapEvent::process() +{ + be->trapSquash = true; +} + +template <class Impl> +const char * +LWBackEnd<Impl>::TrapEvent::description() +{ + return "Trap event"; +} + +template <class Impl> +void +LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst) +{ + bool found_inst = false; + while (!replayList.empty()) { + exeList.push(replayList.front()); + if (replayList.front() == inst) { + found_inst = true; + } + replayList.pop_front(); + } + assert(found_inst); +} + +template<class Impl> +LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + LWBackEnd<Impl> *_be) + : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) +{ + this->setFlags(Event::AutoDelete); +} + +template<class Impl> +void +LWBackEnd<Impl>::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (be->isSwitchedOut()) + return; + + if (dcacheMiss) { + be->removeDcacheMiss(inst); + } + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template<class Impl> +const char * +LWBackEnd<Impl>::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template <class Impl> +LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template <class Impl> +void +LWBackEnd<Impl>::DCacheCompletionEvent::process() +{ +} + +template <class Impl> +const char * +LWBackEnd<Impl>::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template <class Impl> +LWBackEnd<Impl>::LWBackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + trapSquash(false), xcSquash(false), cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + maxOutstandingMemOps = params->maxOutstandingMemOps; + numWaitingMemOps = 0; + waitingInsts = 0; + switchedOut = false; + switchPending = false; + + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template <class Impl> +std::string +LWBackEnd<Impl>::name() const +{ + return cpu->name() + ".backend"; +} + +template <class Impl> +void +LWBackEnd<Impl>::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; i<Num_OpClasses; ++i) { + stringstream subname; + subname << opClassStrings[i] << "_delay"; + issue_delay_dist.subname(i, subname.str()); + } +*/ + // + // Other stats + // + lsq_forw_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + squashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:squashed_insts") + .desc("Number of instructions removed from inst list") + ; + + ROBSquashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:rob_squashed_insts") + .desc("Number of instructions removed from inst list when they reached the head of the ROB") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; +} + +template <class Impl> +void +LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + LSQ.setCPU(cpu_ptr); + checker = cpu->checker; +} + +template <class Impl> +void +LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +#if FULL_SYSTEM +template <class Impl> +void +LWBackEnd<Impl>::checkInterrupts() +{ + if (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode(thread->readPC()) && + !trapSquash && + !xcSquash) { + frontEnd->interruptPending = true; + if (robEmpty() && !LSQ.hasStoresToWB()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + // Generate trap squash event. + generateTrapEvent(); + + DPRINTF(BE, "Interrupt detected.\n"); + } else { + DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); + } + } +} + +template <class Impl> +void +LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency) +{ + DPRINTF(BE, "Handling fault!\n"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + commitStatus = TrapPending; + + // Generate trap squash event. + generateTrapEvent(latency); +} +#endif + +template <class Impl> +void +LWBackEnd<Impl>::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { + cpu->signalSwitched(); + return; + } + + ROB_count[0]+= numInsts; + + wbCycle = 0; + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + +#if FULL_SYSTEM + checkInterrupts(); + + if (trapSquash) { + assert(!xcSquash); + squashFromTrap(); + } else if (xcSquash) { + squashFromXC(); + } +#endif + + if (dispatchStatus != Blocked) { + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + if (commitStatus != TrapPending) { + executeInsts(); + + commitInsts(); + } + + LSQ.writebackStores(); + + DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, " + "LSQ loads: %i, LSQ stores: %i\n", + waitingInsts, numWaitingMemOps, numInsts, + LSQ.numLoads(), LSQ.numStores()); + +#ifdef DEBUG + assert(numInsts == instList.size()); + assert(waitingInsts == waitingList.size()); + assert(numWaitingMemOps == waitingMemOps.size()); + assert(!switchedOut); +#endif +} + +template <class Impl> +void +LWBackEnd<Impl>::updateStructures() +{ + if (fromCommit->doneSeqNum) { + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { +// LSQ.executeLoad(fromCommit->lqIdx); + } else { +// IQ.scheduleNonSpec( +// fromCommit->nonSpecSeqNum); + } + } +} + +template <class Impl> +void +LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template <class Impl> +void +LWBackEnd<Impl>::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + while (numInsts < numROBEntries && + numWaitingMemOps < maxOutstandingMemOps) { + // Get instruction from front of time buffer + DynInstPtr inst = frontEnd->getInst(); + if (!inst) { + break; + } else if (inst->isSquashed()) { + continue; + } + + ++numInsts; + instList.push_front(inst); + + inst->setInROB(); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + } + memBarrier = inst; + inst->setCanCommit(); + } else if (inst->readyToIssue() && + !inst->isNonSpeculative() && + !inst->isStoreConditional()) { + if (inst->isMemRef()) { + + LSQ.insert(inst); + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + addWaitingMemOp(inst); + + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else if (inst->isNop()) { + DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n", + inst->seqNum); + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else { + if (inst->isNonSpeculative() || inst->isStoreConditional()) { + inst->setCanCommit(); + DPRINTF(BE, "Adding non speculative instruction\n"); + } + + if (inst->isMemRef()) { + addWaitingMemOp(inst); + LSQ.insert(inst); + if (memBarrier) { + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + } + } + + DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " + "waitingList.\n", + inst->seqNum); + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. +/* + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +*/ +} + +template <class Impl> +void +LWBackEnd<Impl>::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template <class Impl> +void +LWBackEnd<Impl>::checkDispatchStatus() +{ + DPRINTF(BE, "Checking dispatch status\n"); + assert(dispatchStatus == Blocked); + if (!LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template <class Impl> +void +LWBackEnd<Impl>::executeInsts() +{ + DPRINTF(BE, "Trying to execute instructions\n"); + + int num_executed = 0; + while (!exeList.empty() && num_executed < issueWidth) { + DynInstPtr inst = exeList.top(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + ++num_executed; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + exeList.pop(); + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + if (dcacheInterface->isBlocked()) { + // Should I move the instruction aside? + DPRINTF(BE, "Execute: dcache is blocked\n"); + break; + } + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + if (inst->isLoad()) { + LSQ.executeLoad(inst); + } else if (inst->isStore()) { + LSQ.executeStore(inst); + if (inst->req && !(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + } else { + panic("Unknown mem type!"); + } + } else { + inst->execute(); + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + ++funcExeInst; + ++num_executed; + + exeList.pop(); + + if (inst->mispredicted()) { + squashDueToBranch(inst); + break; + } else if (LSQ.violation()) { + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Squash. + squashDueToMemViolation(inst); + } + } + + issued_ops[0]+= num_executed; + n_issued_dist[num_executed]++; +} + +template<class Impl> +void +LWBackEnd<Impl>::instToCommit(DynInstPtr &inst) +{ + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + + if (inst->isExecuted()) { + inst->setResultReady(); + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_inst[0]+= dependents; + } + } + } + + writeback_count[0]++; +} +#if 0 +template <class Impl> +void +LWBackEnd<Impl>::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setResultReady(); + + if (inst->isExecuted()) { + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} +#endif +template <class Impl> +bool +LWBackEnd<Impl>::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.back(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + thread->setPC(inst->readPC()); + thread->setNextPC(inst->readNextPC()); + inst->reachedCommit = true; + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + if (inst->isNonSpeculative() || + inst->isStoreConditional() || + inst->isMemBarrier() || + inst->isWriteBarrier()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) +#else + if ((inst->isMemBarrier() || inst->isWriteBarrier() || + inst->isQuiesce()) && + LSQ.hasStoresToWB()) +#endif + { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", + inst->seqNum); + assert(memBarrier); + wakeDependents(inst, true); + if (memBarrier == inst) + memBarrier = NULL; + inst->clearMemDependents(); + } + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + if (inst->isStore()) + removeWaitingMemOp(inst); + } + + exeList.push(inst); + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + removeWaitingMemOp(inst); + } + replayMemInst(inst); + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Not handled for now. + assert(!inst->isThreadSync()); + assert(inst->memDepReady()); + // Stores will mark themselves as totally completed as they need + // to wait to writeback to memory. @todo: Hack...attempt to fix + // having the checker be forced to wait until a store completes in + // order to check all of the instructions. If the store at the + // head of the check list misses, but a later store hits, then + // loads in the checker may see the younger store values instead + // of the store they should see. Either the checker needs its own + // memory (annoying to update), its own store buffer (how to tell + // which value is correct?), or something else... + if (!inst->isStore()) { + inst->setCompleted(); + } + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + // Use checker prior to updating anything due to traps or PC + // based events. + if (checker) { + checker->tick(inst); + } + + if (inst_fault != NoFault) { + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + + // Instruction is completed as it has a fault. + inst->setCompleted(); + + if (LSQ.hasStoresToWB()) { + DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); + return false; + } else if (inst_num != 0) { + DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); + return false; + } else if (checker && inst->isStore()) { + checker->tick(inst); + } + + thread->setInst( + static_cast<TheISA::MachInst>(inst->staticInst->machInst)); +#if FULL_SYSTEM + handleFault(inst_fault); + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->setFetchSeq(inst->seqNum); + inst->traceData->setCPSeq(thread->numInst); + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_back(); + + --numInsts; + ++thread->funcExeInst; + // Maybe move this to where the fault is handled; if the fault is + // handled, don't try to set this myself as the fault will set it. + // If not, then I set thread->PC = thread->nextPC and + // thread->nextPC = thread->nextPC + 4. + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); + updateComInstStats(inst); + + // Write the done sequence number here. + toIEW->doneSeqNum = inst->seqNum; + lastCommitCycle = curTick; + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); + xcSquash = true; + return false; + } +#endif + return true; +} + +template <class Impl> +void +LWBackEnd<Impl>::commitInsts() +{ + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commitWidth) { + if (instList.back()->isSquashed()) { + instList.back()->clearDependents(); + instList.pop_back(); + --numInsts; + ROBSquashedInsts[instList.back()->threadNumber]++; + continue; + } + + if (!commitInst(inst_num++)) { + DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " + "%#x is head of ROB and not ready\n", + instList.back()->seqNum, instList.back()->readPC()); + --inst_num; + break; + } + } + n_committed_dist.sample(inst_num); +} + +template <class Impl> +void +LWBackEnd<Impl>::squash(const InstSeqNum &sn) +{ + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt waiting_list_end = waitingList.end(); + InstListIt insts_it = waitingList.begin(); + + while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + if ((*insts_it)->isMemRef()) { + DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n", + (*insts_it)->seqNum); + removeWaitingMemOp((*insts_it)); + } + + waitingList.erase(insts_it++); + waitingInsts--; + } + assert(waitingInsts >= 0); + + insts_it = instList.begin(); + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + (*insts_it)->removeInROB(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + squashedInsts[(*insts_it)->threadNumber]++; + + instList.erase(insts_it++); + --numInsts; + } + + insts_it = waitingList.begin(); + while (!waitingList.empty() && insts_it != waitingList.end()) { + if ((*insts_it)->seqNum < sn) { + ++insts_it; + continue; + } + assert((*insts_it)->isSquashed()); + + waitingList.erase(insts_it++); + waitingInsts--; + } + + while (memBarrier && memBarrier->seqNum > sn) { + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " + "squashed)\n", memBarrier->seqNum); + memBarrier->clearMemDependents(); + if (memBarrier->memDepReady()) { + DPRINTF(BE, "No previous barrier\n"); + memBarrier = NULL; + } else { + std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs(); + memBarrier = srcs.front(); + srcs.pop_front(); + assert(srcs.empty()); + DPRINTF(BE, "Previous barrier: [sn:%lli]\n", + memBarrier->seqNum); + } + } + + frontEnd->addFreeRegs(freed_regs); +} + +template <class Impl> +void +LWBackEnd<Impl>::squashFromXC() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + frontEnd->interruptPending = false; + + thread->trapPending = false; + thread->inSyscall = false; + xcSquash = false; + commitStatus = Running; +} + +template <class Impl> +void +LWBackEnd<Impl>::squashFromTrap() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + frontEnd->interruptPending = false; + + thread->trapPending = false; + thread->inSyscall = false; + trapSquash = false; + commitStatus = Running; +} + +template <class Impl> +void +LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template <class Impl> +void +LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + false, inst->mispredicted()); +} + +template <class Impl> +void +LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template <class Impl> +void +LWBackEnd<Impl>::fetchFault(Fault &fault) +{ + faultFromFetch = fault; + fetchHasFault = true; +} + +template <class Impl> +void +LWBackEnd<Impl>::switchOut() +{ + switchPending = true; +} + +template <class Impl> +void +LWBackEnd<Impl>::doSwitchOut() +{ + switchedOut = true; + switchPending = false; + // Need to get rid of all committed, non-speculative state and write it + // to memory/XC. In this case this is stores that have committed and not + // yet written back. + assert(robEmpty()); + assert(!LSQ.hasStoresToWB()); + + LSQ.switchOut(); + + squash(0); +} + +template <class Impl> +void +LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc) +{ + switchedOut = false; + xcSquash = false; + trapSquash = false; + + numInsts = 0; + numWaitingMemOps = 0; + waitingMemOps.clear(); + waitingInsts = 0; + switchedOut = false; + dispatchStatus = Running; + commitStatus = Running; + LSQ.takeOverFrom(old_xc); +} + +template <class Impl> +void +LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template <class Impl> +void +LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; + + cpu->numInst++; + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[tid]++; + } else { + stat_com_inst[tid]++; + } +#else + stat_com_inst[tid]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[tid]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[tid]++; + + if (inst->isLoad()) { + stat_com_loads[tid]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[tid]++; + } +} + +template <class Impl> +void +LWBackEnd<Impl>::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("ExeList size: %i\n", exeList.size()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("Waiting list size: %i\n", waitingList.size()); + + inst_list_it = --(waitingList.end()); + + while (inst_list_it != waitingList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("waitingMemOps list size: %i\n", waitingMemOps.size()); + + MemIt waiting_it = waitingMemOps.begin(); + + while (waiting_it != waitingMemOps.end()) + { + cprintf("[sn:%lli] ", (*waiting_it)); + waiting_it++; + ++num; + } + cprintf("\n"); +} diff --git a/cpu/ozone/lw_lsq.cc b/cpu/ozone/lw_lsq.cc new file mode 100644 index 000000000..922228b09 --- /dev/null +++ b/cpu/ozone/lw_lsq.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lw_lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLWLSQ<OzoneImpl>; + diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh new file mode 100644 index 000000000..6fe343b42 --- /dev/null +++ b/cpu/ozone/lw_lsq.hh @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_LSQ_HH__ +#define __CPU_OZONE_LW_LSQ_HH__ + +#include <list> +#include <map> +#include <queue> +#include <algorithm> + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/debug.hh" +#include "sim/sim_object.hh" + +//class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template <class Impl> +class OzoneLWLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, + Event *wb_event, OzoneLWLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + DynInstPtr inst; + + BackEnd *be; + /** The writeback event for the store. Needed for store + * conditionals. + */ + public: + Event *wbEvent; + bool miss; + private: + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ<Impl> *lsqPtr; + }; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLWLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue.back().canWB && + !storeQueue.back().completed && + !dcacheInterface->isBlocked(); } + + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + + typename std::list<DynInstPtr>::iterator lqIt; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLWLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::list<SQEntry> storeQueue; + /** The load queue. */ + std::list<DynInstPtr> loadQueue; + + typedef typename std::list<SQEntry>::iterator SQIt; + typedef typename std::list<DynInstPtr>::iterator LQIt; + + + struct HashFn { + size_t operator() (const int a) const + { + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; + } + }; + + m5::hash_map<int, SQIt, HashFn> SQItHash; + std::queue<int> SQIndices; + m5::hash_map<int, LQIt, HashFn> LQItHash; + std::queue<int> LQIndices; + + typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt; + typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt; + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + + int storesToWB; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list<InstSeqNum> mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + LQIt stallingLoad; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template <class T> + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template <class T> + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (!loadQueue.empty()) { + return loadQueue.back()->seqNum; + } else { + return 0; + } + + } + + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (!storeQueue.empty()) { + return storeQueue.back().inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template <class Impl> +template <class T> +Fault +OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + typename m5::hash_map<int, LQIt, HashFn>::iterator + lq_hash_it = LQItHash.find(load_idx); + assert(lq_hash_it != LQItHash.end()); + DynInstPtr inst = (*(*lq_hash_it).second); + + if (inst->isExecuted()) { + panic("Should not reach this point with split ops!"); + + memcpy(&data,req->data,req->size); + + return NoFault; + } + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->flags & UNCACHEABLE && + (inst != loadQueue.back() || !inst->reachedCommit)) { + DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " + "commit/LSQ!\n", + inst->seqNum); + be->rescheduleMemInst(inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + SQIt sq_it = storeQueue.begin(); + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n", + load_idx, req->paddr); + + while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum) + ++sq_it; + + while (1) { + // End once we've reached the top of the LSQ + if (sq_it == storeQueue.end()) { + break; + } + + assert((*sq_it).inst); + + store_size = (*sq_it).size; + + if (store_size == 0) { + sq_it++; + continue; + } + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= (*sq_it).inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= ((*sq_it).inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < ((*sq_it).inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > (*sq_it).inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + + int shift_amt = req->vaddr & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = (*sq_it).data >> shift_amt; + + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " + "[sn:%lli] addr %#x, data %#x\n", + (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data)); + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if ((*sq_it).completed) { + sq_it++; + break; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + inst->seqNum < + (*stallingLoad)->seqNum)) { + stalled = true; + stallingStoreIsn = (*sq_it).inst->seqNum; + stallingLoad = (*lq_hash_it).second; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(inst); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store [sn:%lli] to load addr %#x\n", + (*sq_it).inst->seqNum, req->vaddr); + + return NoFault; + } + sq_it++; + } + + // If there's no forwarding case, then go access memory + DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", + inst->readPC()); + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); + + ++usedPorts; + + // if we have a cache, do cache access too + if (dcacheInterface) { + if (dcacheInterface->isBlocked()) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) + return NoFault; + + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " + "vaddr:%#x flags:%i\n", + inst->readPC(), req->paddr, req->vaddr, req->flags); + + assert(!req->completionEvent); + req->completionEvent = + new typename BackEnd::LdWritebackEvent(inst, be); + + // Do Cache Access + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + // @todo: Probably should support having no events + if (result != MA_HIT) { + DPRINTF(OzoneLSQ, "D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + } else { + DPRINTF(OzoneLSQ, "D-cache hit!\n"); + } + } else { + fatal("Must use D-cache with new memory system"); + } + + return NoFault; +} + +template <class Impl> +template <class T> +Fault +OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + + SQIt sq_it = (*sq_hash_it).second; + assert((*sq_it).inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | [sn:%lli]\n", + store_idx, req->paddr, data, (*sq_it).inst->seqNum); + + (*sq_it).req = req; + (*sq_it).size = sizeof(T); + (*sq_it).data = data; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_OZONE_LW_LSQ_HH__ diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh new file mode 100644 index 000000000..f72bbb1cc --- /dev/null +++ b/cpu/ozone/lw_lsq_impl.hh @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lw_lsq.hh" +#include "cpu/checker/cpu.hh" + +template <class Impl> +OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, + BackEnd *_be, + Event *wb_event, + OzoneLWLSQ<Impl> *lsq_ptr) + : Event(&mainEventQueue), + inst(_inst), + be(_be), + wbEvent(wb_event), + miss(false), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", + inst->seqNum); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + + return; + } + + if (wbEvent) { + wbEvent->process(); + delete wbEvent; + } + + lsqPtr->completeStore(inst->sqIdx); + if (miss) + be->removeDcacheMiss(inst); +} + +template <class Impl> +const char * +OzoneLWLSQ<Impl>::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template <class Impl> +OzoneLWLSQ<Impl>::OzoneLWLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) +{ + DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template<class Impl> +std::string +OzoneLWLSQ<Impl>::name() const +{ + return "lsqunit"; +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::clearLQ() +{ + loadQueue.clear(); +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::clearSQ() +{ + storeQueue.clear(); +} +/* +template<class Impl> +void +OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} +*/ +template<class Impl> +void +OzoneLWLSQ<Impl>::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template<class Impl> +void +OzoneLWLSQ<Impl>::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst) +{ + assert(loads < LQEntries * 2); + assert(!LQIndices.empty()); + int load_index = LQIndices.front(); + LQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), load_index, load_inst->seqNum); + + load_inst->lqIdx = load_index; + + loadQueue.push_front(load_inst); + LQItHash[load_index] = loadQueue.begin(); + + ++loads; +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert(stores - storesToWB < SQEntries); + + assert(!SQIndices.empty()); + int store_index = SQIndices.front(); + SQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), store_index, store_inst->seqNum); + + store_inst->sqIdx = store_index; + SQEntry entry(store_inst); + if (loadQueue.empty()) { + entry.lqIt = loadQueue.end(); + } else { + entry.lqIt = loadQueue.begin(); + } + storeQueue.push_front(entry); + + SQItHash[store_index] = storeQueue.begin(); + + ++stores; +} + +template <class Impl> +typename Impl::DynInstPtr +OzoneLWLSQ<Impl>::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template <class Impl> +unsigned +OzoneLWLSQ<Impl>::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template <class Impl> +int +OzoneLWLSQ<Impl>::numLoadsReady() +{ + int retval = 0; + LQIt lq_it = loadQueue.begin(); + LQIt end_it = loadQueue.end(); + + while (lq_it != end_it) { + if ((*lq_it)->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template <class Impl> +Fault +OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. + be->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template <class Impl> +Fault +OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + SQIt sq_it = (*sq_hash_it).second; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = (*sq_it).size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + (*sq_it).canWB = true; + + ++storesToWB; + DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n", + storesToWB); + } + } + + LQIt lq_it = --(loadQueue.end()); + + if (!memDepViolator) { + while (lq_it != loadQueue.end()) { + if ((*lq_it)->seqNum < store_inst->seqNum) { + lq_it--; + continue; + } + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if (((*lq_it)->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = (*lq_it); + + return TheISA::genMachineCheckFault(); + } + + lq_it--; + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::commitLoad() +{ + assert(!loadQueue.empty()); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue.back()->seqNum, loadQueue.back()->readPC()); + + LQIndices.push(loadQueue.back()->lqIdx); + LQItHash.erase(loadQueue.back()->lqIdx); + + loadQueue.pop_back(); + + --loads; +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || !loadQueue.empty()); + + while (loads != 0 && + loadQueue.back()->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || !storeQueue.empty()); + + SQIt sq_it = --(storeQueue.end()); + while (!storeQueue.empty() && sq_it != storeQueue.end()) { + assert((*sq_it).inst); + if (!(*sq_it).canWB) { + if ((*sq_it).inst->seqNum > youngest_inst) { + break; + } + ++storesToWB; + + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli], storesToWB:%i\n", + (*sq_it).inst->readPC(), + (*sq_it).inst->seqNum, + storesToWB); + + (*sq_it).canWB = true; + } + + sq_it--; + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::writebackStores() +{ + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB && + usedPorts < cachePorts) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; + completeStore(inst->sqIdx); + + continue; + } + + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + (*sq_it).committed = true; + + MemReqPtr req = (*sq_it).req; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + if (!(req->flags & LOCKED)) { + (*sq_it).inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick((*sq_it).inst); + } + } + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(inst, be, NULL, this); + req->completionEvent = store_event; + + MemAccessResult result = dcacheInterface->access(req); + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + store_event->miss = true; + typename BackEnd::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + wb = new typename BackEnd::LdWritebackEvent(inst, + be); + store_event->wbEvent = wb; + } + + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// inst->seqNum); + + be->addDcacheMiss(inst); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + // Increment stat here or something + + sq_it--; + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + inst->sqIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port + // transaction in the 21264, but that might be + // hard to accomplish in this model. + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + store_event->wbEvent = wb; + } + sq_it--; + } + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + + LQIt lq_it = loadQueue.begin(); + + while (loads != 0 && (*lq_it)->seqNum > squashed_num) { + assert(!loadQueue.empty()); + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + (*lq_it)->readPC(), + (*lq_it)->seqNum); + + if (isStalled() && lq_it == stallingLoad) { + stalled = false; + stallingStoreIsn = 0; + stallingLoad = NULL; + } + + --loads; + + // Inefficient! + LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx); + assert(lq_hash_it != LQItHash.end()); + LQItHash.erase(lq_hash_it); + LQIndices.push((*lq_it)->lqIdx); + loadQueue.erase(lq_it++); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + SQIt sq_it = storeQueue.begin(); + + while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) { + assert(!storeQueue.empty()); + + if ((*sq_it).canWB) { + break; + } + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n", + (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx, + (*sq_it).inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + (*sq_it).inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx); + assert(sq_hash_it != SQItHash.end()); + SQItHash.erase(sq_hash_it); + SQIndices.push((*sq_it).inst->sqIdx); + (*sq_it).inst = NULL; + (*sq_it).canWB = 0; + + if ((*sq_it).req) { + assert(!(*sq_it).req->completionEvent); + } + (*sq_it).req = NULL; + --stores; + storeQueue.erase(sq_it++); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + LQIt lq_it = --(loadQueue.end()); + + while (lq_it != loadQueue.end() && (*lq_it)) { + cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum, + (*lq_it)->readPC()); + + lq_it--; + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + SQIt sq_it = --(storeQueue.end()); + + while (sq_it != storeQueue.end() && (*sq_it).inst) { + cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n", + (*sq_it).inst->seqNum, + (*sq_it).inst->readPC(), + (*sq_it).size, + (*sq_it).committed, + (*sq_it).completed, + (*sq_it).canWB); + + sq_it--; + } + + cprintf("\n"); +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::completeStore(int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + SQIt sq_it = (*sq_hash_it).second; + + assert((*sq_it).inst); + (*sq_it).completed = true; + DynInstPtr inst = (*sq_it).inst; + + --storesToWB; + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", + inst->sqIdx, inst->seqNum, storesToWB); + + assert(!storeQueue.empty()); + SQItHash.erase(sq_hash_it); + SQIndices.push(inst->sqIdx); + storeQueue.erase(sq_it); + --stores; + + inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(inst); + } +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::switchOut() +{ + assert(storesToWB == 0); + switchedOut = true; + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; + continue; + } + + // Store conditionals don't complete until *after* they have written + // back. If it's here and not yet sent to memory, then don't bother + // as it's not part of committed state. + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } else if ((*sq_it).req->flags & LOCKED) { + sq_it--; + assert(!(*sq_it).canWB || + ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); + continue; + } + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + } + + // Clear the queue to free up resources + storeQueue.clear(); + loadQueue.clear(); + loads = stores = storesToWB = 0; +} + +template <class Impl> +void +OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc) +{ + // Clear out any old state. May be redundant if this is the first time + // the CPU is being used. + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + switchedOut = false; + + // Could do simple checks here to see if indices are on twice + while (!LQIndices.empty()) + LQIndices.pop(); + while (!SQIndices.empty()) + SQIndices.pop(); + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + usedPorts = 0; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} diff --git a/cpu/ozone/null_predictor.hh b/cpu/ozone/null_predictor.hh new file mode 100644 index 000000000..d19e2cd1c --- /dev/null +++ b/cpu/ozone/null_predictor.hh @@ -0,0 +1,76 @@ + +#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__ +#define __CPU_OZONE_NULL_PREDICTOR_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +template <class Impl> +class NullPredictor +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + NullPredictor(Params *p) { } + + struct BPredInfo { + BPredInfo() + : PC(0), nextPC(0) + { } + + BPredInfo(const Addr &pc, const Addr &next_pc) + : PC(pc), nextPC(next_pc) + { } + + Addr PC; + Addr nextPC; + }; + + BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); } + + void undo(BPredInfo &bp_info) { return; } + + /** + * Predicts whether or not the instruction is a taken branch, and the + * target of the branch if it is taken. + * @param inst The branch instruction. + * @param PC The predicted PC is passed back through this parameter. + * @param tid The thread id. + * @return Returns if the branch is taken or not. + */ + bool predict(DynInstPtr &inst, Addr &PC, unsigned tid) + { return false; } + + /** + * Tells the branch predictor to commit any updates until the given + * sequence number. + * @param done_sn The sequence number to commit any older updates up until. + * @param tid The thread id. + */ + void update(const InstSeqNum &done_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number, and + * corrects that sn's update with the proper address and taken/not taken. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param corr_target The correct branch target. + * @param actually_taken The correct branch direction. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken, unsigned tid) + { } + +}; + +#endif // __CPU_OZONE_NULL_PREDICTOR_HH__ diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh new file mode 100644 index 000000000..1f543ec6e --- /dev/null +++ b/cpu/ozone/ozone_impl.hh @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_OZONE_IMPL_HH__ +#define __CPU_OZONE_OZONE_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/back_end.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inst_queue.hh" +#include "cpu/ozone/lsq_unit.hh" +#include "cpu/ozone/lw_lsq.hh" +#include "cpu/ozone/lw_back_end.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +template <class Impl> +class OzoneCPU; + +template <class Impl> +class OzoneDynInst; + +struct OzoneImpl { + typedef SimpleParams Params; + typedef OzoneCPU<OzoneImpl> OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit<OzoneImpl> BranchPred; + typedef FrontEnd<OzoneImpl> FrontEnd; + // Will need IQ, LSQ eventually + typedef LWBackEnd<OzoneImpl> BackEnd; + + typedef InstQueue<OzoneImpl> InstQueue; + typedef OzoneLWLSQ<OzoneImpl> LdstQueue; + + typedef OzoneDynInst<OzoneImpl> DynInst; + typedef RefCountingPtr<DynInst> DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_OZONE_IMPL_HH__ diff --git a/cpu/ozone/rename_table.cc b/cpu/ozone/rename_table.cc new file mode 100644 index 000000000..fff41903e --- /dev/null +++ b/cpu/ozone/rename_table.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/rename_table_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class RenameTable<OzoneImpl>; +template class RenameTable<SimpleImpl>; diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh new file mode 100644 index 000000000..6ee23b21b --- /dev/null +++ b/cpu/ozone/rename_table.hh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_RENAME_TABLE_HH__ +#define __CPU_OZONE_RENAME_TABLE_HH__ + +#include "arch/isa_traits.hh" + +/** Rename table that holds the rename of each architectural register to + * producing DynInst. Needs to support copying from one table to another. + */ + +template <class Impl> +class RenameTable { + public: + typedef typename Impl::DynInstPtr DynInstPtr; + + RenameTable(); + + void copyFrom(const RenameTable<Impl> &table_to_copy); + + DynInstPtr &operator [] (int index) + { return table[index]; } + + DynInstPtr table[TheISA::TotalNumRegs]; +}; + +#endif // __CPU_OZONE_RENAME_TABLE_HH__ diff --git a/cpu/ozone/rename_table_impl.hh b/cpu/ozone/rename_table_impl.hh new file mode 100644 index 000000000..86fc1cc55 --- /dev/null +++ b/cpu/ozone/rename_table_impl.hh @@ -0,0 +1,23 @@ + +#include <cstdlib> // Not really sure what to include to get NULL +#include "cpu/ozone/rename_table.hh" + +template <class Impl> +RenameTable<Impl>::RenameTable() +{ + // Actually should set these to dummy dyn insts that have the initial value + // and force their values to be initialized. This keeps everything the + // same. + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = NULL; + } +} + +template <class Impl> +void +RenameTable<Impl>::copyFrom(const RenameTable<Impl> &table_to_copy) +{ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = table_to_copy.table[i]; + } +} diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh new file mode 100644 index 000000000..961bf2ea9 --- /dev/null +++ b/cpu/ozone/simple_impl.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__ +#define __CPU_OZONE_SIMPLE_IMPL_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +//template <class Impl> +//class OzoneCPU; + +template <class Impl> +class OzoneDynInst; + +struct SimpleImpl { + typedef SimpleParams Params; + typedef OzoneCPU<SimpleImpl> OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit<SimpleImpl> BranchPred; + typedef FrontEnd<SimpleImpl> FrontEnd; + // Will need IQ, LSQ eventually + typedef InorderBackEnd<SimpleImpl> BackEnd; + + typedef OzoneDynInst<SimpleImpl> DynInst; + typedef RefCountingPtr<DynInst> DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_SIMPLE_IMPL_HH__ diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh new file mode 100644 index 000000000..647da1781 --- /dev/null +++ b/cpu/ozone/simple_params.hh @@ -0,0 +1,165 @@ + + +#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ +#define __CPU_OZONE_SIMPLE_PARAMS_HH__ + +#include "cpu/ozone/cpu.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class FUPool; +class FunctionalMemory; +class MemInterface; +class PageTable; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the OzoneCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class SimpleParams : public BaseCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector<Process *> workload; +// Process *process; +#endif // FULL_SYSTEM + + //Page Table + PageTable *pTable; + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + unsigned cachePorts; + unsigned width; + unsigned frontEndWidth; + unsigned backEndWidth; + unsigned backEndSquashLatency; + unsigned backEndLatency; + unsigned maxInstBufferSize; + unsigned numPhysicalRegs; + unsigned maxOutstandingMemOps; + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + FUPool *fuPool; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // + unsigned localPredictorSize; + unsigned localCtrBits; + unsigned localHistoryTableSize; + unsigned localHistoryBits; + unsigned globalPredictorSize; + unsigned globalCtrBits; + unsigned globalHistoryBits; + unsigned choicePredictorSize; + unsigned choiceCtrBits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + bool decoupledFrontEnd; + int dispatchWidth; + int wbWidth; + + //SMT Parameters + unsigned smtNumFetchingThreads; + + std::string smtFetchPolicy; + + std::string smtIQPolicy; + unsigned smtIQThreshold; + + std::string smtLSQPolicy; + unsigned smtLSQThreshold; + + std::string smtCommitPolicy; + + std::string smtROBPolicy; + unsigned smtROBThreshold; + + // Probably can get this from somewhere. + unsigned instShiftAmt; +}; + +#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__ diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh new file mode 100644 index 000000000..c86c3a720 --- /dev/null +++ b/cpu/ozone/thread_state.hh @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_THREAD_STATE_HH__ +#define __CPU_OZONE_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" +#include "sim/process.hh" + +class Event; +//class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class Process; +class FunctionalMemory; +#endif + +// Maybe this ozone thread state should only really have committed state? +// I need to think about why I'm using this and what it's useful for. Clearly +// has benefits for SMT; basically serves same use as CPUExecContext. +// Makes the ExecContext proxy easier. Gives organization/central access point +// to state of a thread that can be accessed normally (i.e. not in-flight +// stuff within a OoO processor). Does this need an XC proxy within it? +template <class Impl> +struct OzoneThreadState : public ThreadState { + typedef typename ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + typedef TheISA::MiscReg MiscReg; + +#if FULL_SYSTEM + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#else + OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } + + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#endif + + Status _status; + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + + RenameTable<Impl> renameTable; + Addr PC; + Addr nextPC; + + // Current instruction + TheISA::MachInst inst; + + TheISA::RegFile regs; + + typename Impl::FullCPU *cpu; + + bool inSyscall; + + bool trapPending; + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + +#if !FULL_SYSTEM + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } +#else + Fault translateInstReq(MemReqPtr &req) + { + return cpu->itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return cpu->dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return cpu->dtb->translate(req, true); + } +#endif + + MiscReg readMiscReg(int misc_reg) + { + return regs.miscRegs.readReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.miscRegs.readRegWithEffect(misc_reg, fault, xcProxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.miscRegs.setReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.miscRegs.setRegWithEffect(misc_reg, val, xcProxy); + } + + uint64_t readPC() + { return PC; } + + void setPC(uint64_t val) + { PC = val; } + + uint64_t readNextPC() + { return nextPC; } + + void setNextPC(uint64_t val) + { nextPC = val; } + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } +}; + +#endif // __CPU_OZONE_THREAD_STATE_HH__ |