From 759ff4b91024835d3bf436b993b0f39e276c36fe Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Sat, 22 Apr 2006 18:45:01 -0400
Subject: Updates for OzoneCPU.

build/SConstruct:
    Include Ozone CPU models.
cpu/cpu_models.py:
    Include OzoneCPU models.

--HG--
extra : convert_revision : 51a016c216cacd2cc613eed79653026c2edda4b3
---
 cpu/ozone/back_end.cc              |    5 +
 cpu/ozone/back_end.hh              |  509 ++++++++++
 cpu/ozone/back_end_impl.hh         | 1853 ++++++++++++++++++++++++++++++++++++
 cpu/ozone/cpu.cc                   |    9 +-
 cpu/ozone/cpu.hh                   |  608 +++++-------
 cpu/ozone/cpu_builder.cc           |  818 ++++++++++++++++
 cpu/ozone/cpu_impl.hh              | 1140 +++++++++++++++++++++-
 cpu/ozone/dyn_inst.cc              |   35 +
 cpu/ozone/dyn_inst.hh              |  261 +++++
 cpu/ozone/dyn_inst_impl.hh         |  286 ++++++
 cpu/ozone/front_end.cc             |    7 +
 cpu/ozone/front_end.hh             |  242 +++++
 cpu/ozone/front_end_impl.hh        |  798 ++++++++++++++++
 cpu/ozone/inorder_back_end.cc      |    5 +
 cpu/ozone/inorder_back_end.hh      |  417 ++++++++
 cpu/ozone/inorder_back_end_impl.hh |  519 ++++++++++
 cpu/ozone/inst_queue.cc            |   36 +
 cpu/ozone/inst_queue.hh            |  506 ++++++++++
 cpu/ozone/inst_queue_impl.hh       | 1341 ++++++++++++++++++++++++++
 cpu/ozone/lsq_unit.cc              |   34 +
 cpu/ozone/lsq_unit.hh              |  632 ++++++++++++
 cpu/ozone/lsq_unit_impl.hh         |  846 ++++++++++++++++
 cpu/ozone/null_predictor.hh        |   76 ++
 cpu/ozone/ozone_impl.hh            |   73 ++
 cpu/ozone/rename_table.cc          |    7 +
 cpu/ozone/rename_table.hh          |   25 +
 cpu/ozone/rename_table_impl.hh     |   23 +
 cpu/ozone/simple_impl.hh           |   69 ++
 cpu/ozone/simple_params.hh         |  164 ++++
 cpu/ozone/thread_state.hh          |  171 ++++
 30 files changed, 11142 insertions(+), 373 deletions(-)
 create mode 100644 cpu/ozone/back_end.cc
 create mode 100644 cpu/ozone/back_end.hh
 create mode 100644 cpu/ozone/back_end_impl.hh
 create mode 100644 cpu/ozone/cpu_builder.cc
 create mode 100644 cpu/ozone/dyn_inst.cc
 create mode 100644 cpu/ozone/dyn_inst.hh
 create mode 100644 cpu/ozone/dyn_inst_impl.hh
 create mode 100644 cpu/ozone/front_end.cc
 create mode 100644 cpu/ozone/front_end.hh
 create mode 100644 cpu/ozone/front_end_impl.hh
 create mode 100644 cpu/ozone/inorder_back_end.cc
 create mode 100644 cpu/ozone/inorder_back_end.hh
 create mode 100644 cpu/ozone/inorder_back_end_impl.hh
 create mode 100644 cpu/ozone/inst_queue.cc
 create mode 100644 cpu/ozone/inst_queue.hh
 create mode 100644 cpu/ozone/inst_queue_impl.hh
 create mode 100644 cpu/ozone/lsq_unit.cc
 create mode 100644 cpu/ozone/lsq_unit.hh
 create mode 100644 cpu/ozone/lsq_unit_impl.hh
 create mode 100644 cpu/ozone/null_predictor.hh
 create mode 100644 cpu/ozone/ozone_impl.hh
 create mode 100644 cpu/ozone/rename_table.cc
 create mode 100644 cpu/ozone/rename_table.hh
 create mode 100644 cpu/ozone/rename_table_impl.hh
 create mode 100644 cpu/ozone/simple_impl.hh
 create mode 100644 cpu/ozone/simple_params.hh
 create mode 100644 cpu/ozone/thread_state.hh

(limited to 'cpu/ozone')
diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc
new file mode 100644
index 000000000..dbab5435e
--- /dev/null
+++ b/cpu/ozone/back_end.cc
@@ -0,0 +1,5 @@
+
+#include "cpu/ozone/back_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+template class BackEnd<OzoneImpl>;
diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh
new file mode 100644
index 000000000..0713a0143
--- /dev/null
+++ b/cpu/ozone/back_end.hh
@@ -0,0 +1,509 @@
+
+#ifndef __CPU_OZONE_BACK_END_HH__
+#define __CPU_OZONE_BACK_END_HH__
+
+#include <list>
+#include <queue>
+#include <string>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/functional/functional.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
+#include "sim/eventq.hh"
+
+class ExecContext;
+
+template <class Impl>
+class OzoneThreadState;
+
+template <class Impl>
+class BackEnd
+{
+  public:
+    typedef OzoneThreadState<Impl> Thread;
+
+    typedef typename Impl::Params Params;
+    typedef typename Impl::DynInst DynInst;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::FrontEnd FrontEnd;
+    typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+    struct SizeStruct {
+        int size;
+    };
+
+    typedef SizeStruct DispatchToIssue;
+    typedef SizeStruct IssueToExec;
+    typedef SizeStruct ExecToCommit;
+    typedef SizeStruct Writeback;
+
+    TimeBuffer<DispatchToIssue> d2i;
+    typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
+    TimeBuffer<IssueToExec> i2e;
+    typename TimeBuffer<IssueToExec>::wire instsToExecute;
+    TimeBuffer<ExecToCommit> e2c;
+    TimeBuffer<Writeback> numInstsToWB;
+
+    TimeBuffer<CommStruct> *comm;
+    typename TimeBuffer<CommStruct>::wire toIEW;
+    typename TimeBuffer<CommStruct>::wire fromCommit;
+
+    class InstQueue {
+        enum queue {
+            NonSpec,
+            IQ,
+            ToBeScheduled,
+            ReadyList,
+            ReplayList
+        };
+        struct pqCompare {
+            bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+            {
+                return lhs->seqNum > rhs->seqNum;
+            }
+        };
+      public:
+        InstQueue(Params *params);
+
+        std::string name() const;
+
+        void regStats();
+
+        void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
+
+        void setBE(BackEnd *_be) { be = _be; }
+
+        void insert(DynInstPtr &inst);
+
+        void scheduleReadyInsts();
+
+        void scheduleNonSpec(const InstSeqNum &sn);
+
+        DynInstPtr getReadyInst();
+
+        void commit(const InstSeqNum &sn) {}
+
+        void squash(const InstSeqNum &sn);
+
+        int wakeDependents(DynInstPtr &inst);
+
+        /** Tells memory dependence unit that a memory instruction needs to be
+         * rescheduled. It will re-execute once replayMemInst() is called.
+         */
+        void rescheduleMemInst(DynInstPtr &inst);
+
+        /** Re-executes all rescheduled memory instructions. */
+        void replayMemInst(DynInstPtr &inst);
+
+        /** Completes memory instruction. */
+        void completeMemInst(DynInstPtr &inst);
+
+        void violation(DynInstPtr &inst, DynInstPtr &violation) { }
+
+        bool isFull() { return numInsts >= size; }
+
+        void dumpInsts();
+
+      private:
+        bool find(queue q, typename std::list<DynInstPtr>::iterator it);
+        BackEnd *be;
+        TimeBuffer<IssueToExec> *i2e;
+        typename TimeBuffer<IssueToExec>::wire numIssued;
+        typedef typename std::list<DynInstPtr> InstList;
+        typedef typename std::list<DynInstPtr>::iterator InstListIt;
+        typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
+        // Not sure I need the IQ list; it just needs to be a count.
+        InstList iq;
+        InstList toBeScheduled;
+        InstList readyList;
+        InstList nonSpec;
+        InstList replayList;
+        ReadyInstQueue readyQueue;
+        int size;
+        int numInsts;
+        int width;
+
+        Stats::VectorDistribution<> occ_dist;
+
+        Stats::Vector<> inst_count;
+        Stats::Vector<> peak_inst_count;
+        Stats::Scalar<> empty_count;
+        Stats::Scalar<> current_count;
+        Stats::Scalar<> fullCount;
+
+        Stats::Formula occ_rate;
+        Stats::Formula avg_residency;
+        Stats::Formula empty_rate;
+        Stats::Formula full_rate;
+    };
+
+    /** LdWriteback event for a load completion. */
+    class LdWritebackEvent : public Event {
+      private:
+        /** Instruction that is writing back data to the register file. */
+        DynInstPtr inst;
+        /** Pointer to IEW stage. */
+        BackEnd *be;
+
+      public:
+        /** Constructs a load writeback event. */
+        LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
+
+        /** Processes writeback event. */
+        virtual void process();
+        /** Returns the description of the writeback event. */
+        virtual const char *description();
+    };
+
+    BackEnd(Params *params);
+
+    std::string name() const;
+
+    void regStats();
+
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    void setFrontEnd(FrontEnd *front_end_ptr)
+    { frontEnd = front_end_ptr; }
+
+    void setXC(ExecContext *xc_ptr)
+    { xc = xc_ptr; }
+
+    void setThreadState(Thread *thread_ptr)
+    { thread = thread_ptr; }
+
+    void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+    void tick();
+    void squash();
+    void squashFromXC();
+    bool xcSquash;
+
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx);
+
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx);
+
+    Addr readCommitPC() { return commitPC; }
+
+    Addr commitPC;
+
+    bool robEmpty() { return instList.empty(); }
+
+    bool isFull() { return numInsts >= numROBEntries; }
+    bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
+
+    /** Tells memory dependence unit that a memory instruction needs to be
+     * rescheduled. It will re-execute once replayMemInst() is called.
+     */
+    void rescheduleMemInst(DynInstPtr &inst)
+    { IQ.rescheduleMemInst(inst); }
+
+    /** Re-executes all rescheduled memory instructions. */
+    void replayMemInst(DynInstPtr &inst)
+    { IQ.replayMemInst(inst); }
+
+    /** Completes memory instruction. */
+    void completeMemInst(DynInstPtr &inst)
+    { IQ.completeMemInst(inst); }
+
+    void fetchFault(Fault &fault);
+
+  private:
+    void updateStructures();
+    void dispatchInsts();
+    void dispatchStall();
+    void checkDispatchStatus();
+    void scheduleReadyInsts();
+    void executeInsts();
+    void commitInsts();
+    void addToIQ(DynInstPtr &inst);
+    void addToLSQ(DynInstPtr &inst);
+    void instToCommit(DynInstPtr &inst);
+    void writebackInsts();
+    bool commitInst(int inst_num);
+    void squash(const InstSeqNum &sn);
+    void squashDueToBranch(DynInstPtr &inst);
+    void squashDueToMemBlocked(DynInstPtr &inst);
+    void updateExeInstStats(DynInstPtr &inst);
+    void updateComInstStats(DynInstPtr &inst);
+
+  public:
+    FullCPU *cpu;
+
+    FrontEnd *frontEnd;
+
+    ExecContext *xc;
+
+    Thread *thread;
+
+    enum Status {
+        Running,
+        Idle,
+        DcacheMissStall,
+        DcacheMissComplete,
+        Blocked
+    };
+
+    Status status;
+
+    Status dispatchStatus;
+
+    Counter funcExeInst;
+
+  private:
+//    typedef typename Impl::InstQueue InstQueue;
+
+    InstQueue IQ;
+
+    typedef typename Impl::LdstQueue LdstQueue;
+
+    LdstQueue LSQ;
+  public:
+    RenameTable<Impl> commitRenameTable;
+
+    RenameTable<Impl> renameTable;
+  private:
+    class DCacheCompletionEvent : public Event
+    {
+      private:
+        BackEnd *be;
+
+      public:
+        DCacheCompletionEvent(BackEnd *_be);
+
+        virtual void process();
+        virtual const char *description();
+    };
+
+    friend class DCacheCompletionEvent;
+
+    DCacheCompletionEvent cacheCompletionEvent;
+
+    MemInterface *dcacheInterface;
+
+    MemReqPtr memReq;
+
+    // General back end width. Used if the more specific isn't given.
+    int width;
+
+    // Dispatch width.
+    int dispatchWidth;
+    int numDispatchEntries;
+    int dispatchSize;
+
+    int issueWidth;
+
+    // Writeback width
+    int wbWidth;
+
+    // Commit width
+    int commitWidth;
+
+    /** Index into queue of instructions being written back. */
+    unsigned wbNumInst;
+
+    /** Cycle number within the queue of instructions being written
+     * back.  Used in case there are too many instructions writing
+     * back at the current cycle and writesbacks need to be scheduled
+     * for the future. See comments in instToCommit().
+     */
+    unsigned wbCycle;
+
+    int numROBEntries;
+    int numInsts;
+
+  private:
+    typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+    std::list<DynInstPtr> instList;
+    std::list<DynInstPtr> dispatch;
+    std::list<DynInstPtr> writeback;
+
+    int latency;
+
+    int squashLatency;
+
+    bool exactFullStall;
+
+    bool fetchRedirect[Impl::MaxThreads];
+
+    // number of cycles stalled for D-cache misses
+/*    Stats::Scalar<> dcacheStallCycles;
+      Counter lastDcacheStall;
+*/
+    Stats::Vector<> rob_cap_events;
+    Stats::Vector<> rob_cap_inst_count;
+    Stats::Vector<> iq_cap_events;
+    Stats::Vector<> iq_cap_inst_count;
+    // total number of instructions executed
+    Stats::Vector<> exe_inst;
+    Stats::Vector<> exe_swp;
+    Stats::Vector<> exe_nop;
+    Stats::Vector<> exe_refs;
+    Stats::Vector<> exe_loads;
+    Stats::Vector<> exe_branches;
+
+    Stats::Vector<> issued_ops;
+
+    // total number of loads forwaded from LSQ stores
+    Stats::Vector<> lsq_forw_loads;
+
+    // total number of loads ignored due to invalid addresses
+    Stats::Vector<> inv_addr_loads;
+
+    // total number of software prefetches ignored due to invalid addresses
+    Stats::Vector<> inv_addr_swpfs;
+    // ready loads blocked due to memory disambiguation
+    Stats::Vector<> lsq_blocked_loads;
+
+    Stats::Scalar<> lsqInversion;
+
+    Stats::Vector<> n_issued_dist;
+    Stats::VectorDistribution<> issue_delay_dist;
+
+    Stats::VectorDistribution<> queue_res_dist;
+/*
+    Stats::Vector<> stat_fu_busy;
+    Stats::Vector2d<> stat_fuBusy;
+    Stats::Vector<> dist_unissued;
+    Stats::Vector2d<> stat_issued_inst_type;
+
+    Stats::Formula misspec_cnt;
+    Stats::Formula misspec_ipc;
+    Stats::Formula issue_rate;
+    Stats::Formula issue_stores;
+    Stats::Formula issue_op_rate;
+    Stats::Formula fu_busy_rate;
+    Stats::Formula commit_stores;
+    Stats::Formula commit_ipc;
+    Stats::Formula commit_ipb;
+    Stats::Formula lsq_inv_rate;
+*/
+    Stats::Vector<> writeback_count;
+    Stats::Vector<> producer_inst;
+    Stats::Vector<> consumer_inst;
+    Stats::Vector<> wb_penalized;
+
+    Stats::Formula wb_rate;
+    Stats::Formula wb_fanout;
+    Stats::Formula wb_penalized_rate;
+
+    // total number of instructions committed
+    Stats::Vector<> stat_com_inst;
+    Stats::Vector<> stat_com_swp;
+    Stats::Vector<> stat_com_refs;
+    Stats::Vector<> stat_com_loads;
+    Stats::Vector<> stat_com_membars;
+    Stats::Vector<> stat_com_branches;
+
+    Stats::Distribution<> n_committed_dist;
+
+    Stats::Scalar<> commit_eligible_samples;
+    Stats::Vector<> commit_eligible;
+
+    Stats::Scalar<> ROB_fcount;
+    Stats::Formula ROB_full_rate;
+
+    Stats::Vector<>  ROB_count;	 // cumulative ROB occupancy
+    Stats::Formula ROB_occ_rate;
+    Stats::VectorDistribution<> ROB_occ_dist;
+  public:
+    void dumpInsts();
+};
+
+template <class Impl>
+template <class T>
+Fault
+BackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+/*    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataReadReq(memReq);
+
+    // if we have a cache, do cache access too
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Read;
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
+            // Fix this hack for keeping funcExeInst correct with loads that
+            // are executed twice.
+            --funcExeInst;
+
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+//	    status = DcacheMissStall;
+            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+        } else {
+            // do functional access
+            fault = thread->mem->read(memReq, data);
+
+        }
+    }
+*/
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Read");
+*/
+    return LSQ.read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+BackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+/*
+    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataWriteReq(memReq);
+
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Write;
+        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+//	    status = DcacheMissStall;
+            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+        }
+    }
+
+    if (res && (fault == NoFault))
+        *res = memReq->result;
+        */
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Write");
+*/
+    return LSQ.write(req, data, store_idx);
+}
+
+#endif // __CPU_OZONE_BACK_END_HH__
diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh
new file mode 100644
index 000000000..807afaf2e
--- /dev/null
+++ b/cpu/ozone/back_end_impl.hh
@@ -0,0 +1,1853 @@
+
+#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/ozone/back_end.hh"
+
+template <class Impl>
+BackEnd<Impl>::InstQueue::InstQueue(Params *params)
+    : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
+{
+}
+
+template <class Impl>
+std::string
+BackEnd<Impl>::InstQueue::name() const
+{
+    return be->name() + ".iq";
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::regStats()
+{
+    using namespace Stats;
+
+    occ_dist
+        .init(1, 0, size, 2)
+        .name(name() + "occ_dist")
+        .desc("IQ Occupancy per cycle")
+        .flags(total | cdf)
+        ;
+
+    inst_count
+        .init(1)
+        .name(name() + "cum_num_insts")
+        .desc("Total occupancy")
+        .flags(total)
+        ;
+
+    peak_inst_count
+        .init(1)
+        .name(name() + "peak_occupancy")
+        .desc("Peak IQ occupancy")
+        .flags(total)
+        ;
+
+    current_count
+        .name(name() + "current_count")
+        .desc("Occupancy this cycle")
+        ;
+
+    empty_count
+        .name(name() + "empty_count")
+        .desc("Number of empty cycles")
+        ;
+
+    fullCount
+        .name(name() + "full_count")
+        .desc("Number of full cycles")
+        ;
+
+
+    occ_rate
+        .name(name() + "occ_rate")
+        .desc("Average occupancy")
+        .flags(total)
+        ;
+    occ_rate = inst_count / be->cpu->numCycles;
+
+    avg_residency
+        .name(name() + "avg_residency")
+        .desc("Average IQ residency")
+        .flags(total)
+        ;
+    avg_residency = occ_rate / be->cpu->numCycles;
+
+    empty_rate
+        .name(name() + "empty_rate")
+        .desc("Fraction of cycles empty")
+        ;
+    empty_rate = 100 * empty_count / be->cpu->numCycles;
+
+    full_rate
+        .name(name() + "full_rate")
+        .desc("Fraction of cycles full")
+        ;
+    full_rate = 100 * fullCount / be->cpu->numCycles;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
+{
+    i2e = i2e_queue;
+    numIssued = i2e->getWire(0);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
+{
+    numInsts++;
+    inst_count[0]++;
+    if (!inst->isNonSpeculative()) {
+        if (inst->readyToIssue()) {
+            toBeScheduled.push_front(inst);
+            inst->iqIt = toBeScheduled.begin();
+            inst->iqItValid = true;
+        } else {
+            iq.push_front(inst);
+            inst->iqIt = iq.begin();
+            inst->iqItValid = true;
+        }
+    } else {
+        nonSpec.push_front(inst);
+        inst->iqIt = nonSpec.begin();
+        inst->iqItValid = true;
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::scheduleReadyInsts()
+{
+    int scheduled = numIssued->size;
+    InstListIt iq_it = --toBeScheduled.end();
+    InstListIt iq_end_it = toBeScheduled.end();
+
+    while (iq_it != iq_end_it && scheduled < width) {
+//        if ((*iq_it)->readyToIssue()) {
+            DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
+                    (*iq_it)->seqNum, (*iq_it)->readPC());
+            readyQueue.push(*iq_it);
+            readyList.push_front(*iq_it);
+
+            (*iq_it)->iqIt = readyList.begin();
+
+            toBeScheduled.erase(iq_it--);
+
+            ++scheduled;
+//        } else {
+//            iq_it++;
+//        }
+    }
+
+    numIssued->size+= scheduled;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
+{
+/*
+    InstListIt non_spec_it = nonSpec.begin();
+    InstListIt non_spec_end_it = nonSpec.end();
+
+    while ((*non_spec_it)->seqNum != sn) {
+        non_spec_it++;
+        assert(non_spec_it != non_spec_end_it);
+    }
+*/
+    DynInstPtr inst = nonSpec.back();
+
+    assert(inst->seqNum == sn);
+
+    assert(find(NonSpec, inst->iqIt));
+    nonSpec.erase(inst->iqIt);
+    readyList.push_front(inst);
+    inst->iqIt = readyList.begin();
+    readyQueue.push(inst);
+    numIssued->size++;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+BackEnd<Impl>::InstQueue::getReadyInst()
+{
+    assert(!readyList.empty());
+
+    DynInstPtr inst = readyQueue.top();
+    readyQueue.pop();
+    assert(find(ReadyList, inst->iqIt));
+    readyList.erase(inst->iqIt);
+    inst->iqItValid = false;
+//    if (!inst->isMemRef())
+        --numInsts;
+    return inst;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
+{
+    InstListIt iq_it = iq.begin();
+    InstListIt iq_end_it = iq.end();
+
+    while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+        (*iq_it)->iqItValid = false;
+        iq.erase(iq_it++);
+        --numInsts;
+    }
+
+    iq_it = nonSpec.begin();
+    iq_end_it = nonSpec.end();
+
+    while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+        (*iq_it)->iqItValid = false;
+        nonSpec.erase(iq_it++);
+        --numInsts;
+    }
+
+    iq_it = replayList.begin();
+    iq_end_it = replayList.end();
+
+    while (iq_it != iq_end_it) {
+        if ((*iq_it)->seqNum > sn) {
+            (*iq_it)->iqItValid = false;
+            replayList.erase(iq_it++);
+            --numInsts;
+        } else {
+            iq_it++;
+        }
+    }
+
+    assert(numInsts >= 0);
+/*
+    InstListIt ready_it = readyList.begin();
+    InstListIt ready_end_it = readyList.end();
+
+    while (ready_it != ready_end_it) {
+        if ((*ready_it)->seqNum > sn) {
+            readyList.erase(ready_it++);
+        } else {
+            ready_it++;
+        }
+    }
+*/
+}
+
+template <class Impl>
+int
+BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
+{
+    assert(!inst->isSquashed());
+    std::vector<DynInstPtr> &dependents = inst->getDependents();
+    int num_outputs = dependents.size();
+
+    for (int i = 0; i < num_outputs; i++) {
+        DynInstPtr inst = dependents[i];
+        inst->markSrcRegReady();
+        if (inst->readyToIssue() && inst->iqItValid) {
+            if (inst->isNonSpeculative()) {
+                assert(find(NonSpec, inst->iqIt));
+                nonSpec.erase(inst->iqIt);
+            } else {
+                assert(find(IQ, inst->iqIt));
+                iq.erase(inst->iqIt);
+            }
+
+            toBeScheduled.push_front(inst);
+            inst->iqIt = toBeScheduled.begin();
+        }
+    }
+    return num_outputs;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
+{
+    assert(!inst->iqItValid);
+    replayList.push_front(inst);
+    inst->iqIt = replayList.begin();
+    inst->iqItValid = true;
+    ++numInsts;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
+{
+    assert(find(ReplayList, inst->iqIt));
+    InstListIt iq_it = --replayList.end();
+    InstListIt iq_end_it = replayList.end();
+    while (iq_it != iq_end_it) {
+        DynInstPtr rescheduled_inst = (*iq_it);
+        replayList.erase(iq_it--);
+        toBeScheduled.push_front(rescheduled_inst);
+        rescheduled_inst->iqIt = toBeScheduled.begin();
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
+{
+    panic("Not implemented.");
+}
+
+template <class Impl>
+bool
+BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
+{
+    InstListIt iq_it, iq_end_it;
+    switch(q) {
+      case NonSpec:
+        iq_it = nonSpec.begin();
+        iq_end_it = nonSpec.end();
+        break;
+      case IQ:
+        iq_it = iq.begin();
+        iq_end_it = iq.end();
+        break;
+      case ToBeScheduled:
+        iq_it = toBeScheduled.begin();
+        iq_end_it = toBeScheduled.end();
+        break;
+      case ReadyList:
+        iq_it = readyList.begin();
+        iq_end_it = readyList.end();
+        break;
+      case ReplayList:
+        iq_it = replayList.begin();
+        iq_end_it = replayList.end();
+    }
+
+    while (iq_it != it && iq_it != iq_end_it) {
+        iq_it++;
+    }
+    if (iq_it == it) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::dumpInsts()
+{
+    cprintf("IQ size: %i\n", iq.size());
+
+    InstListIt inst_list_it = --iq.end();
+
+    int num = 0;
+    int valid_num = 0;
+    while (inst_list_it != iq.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+    cprintf("nonSpec size: %i\n", nonSpec.size());
+
+    inst_list_it = --nonSpec.end();
+
+    while (inst_list_it != nonSpec.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+    cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
+
+    inst_list_it = --toBeScheduled.end();
+
+    while (inst_list_it != toBeScheduled.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+    cprintf("readyList size: %i\n", readyList.size());
+
+    inst_list_it = --readyList.end();
+
+    while (inst_list_it != readyList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+}
+
+template<class Impl>
+BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
+                                                  BackEnd<Impl> *_be)
+    : Event(&mainEventQueue), inst(_inst), be(_be)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+BackEnd<Impl>::LdWritebackEvent::process()
+{
+    DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
+//    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+//    iewStage->wakeCPU();
+
+    if (inst->isSquashed()) {
+        inst = NULL;
+        return;
+    }
+
+    if (!inst->isExecuted()) {
+        inst->setExecuted();
+
+        // Execute again to copy data to proper place.
+        inst->completeAcc();
+    }
+
+    // Need to insert instruction into queue to commit
+    be->instToCommit(inst);
+
+    //wroteToTimeBuffer = true;
+//    iewStage->activityThisCycle();
+
+    inst = NULL;
+}
+
+template<class Impl>
+const char *
+BackEnd<Impl>::LdWritebackEvent::description()
+{
+    return "Load writeback event";
+}
+
+
+template <class Impl>
+BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
+    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::DCacheCompletionEvent::process()
+{
+}
+
+template <class Impl>
+const char *
+BackEnd<Impl>::DCacheCompletionEvent::description()
+{
+    return "Cache completion event";
+}
+
+template <class Impl>
+BackEnd<Impl>::BackEnd(Params *params)
+    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+      xcSquash(false), IQ(params),
+      cacheCompletionEvent(this), width(params->backEndWidth),
+      exactFullStall(true)
+{
+    numROBEntries = params->numROBEntries;
+    numInsts = 0;
+    numDispatchEntries = 32;
+    IQ.setBE(this);
+    LSQ.setBE(this);
+
+    // Setup IQ and LSQ with their parameters here.
+    instsToDispatch = d2i.getWire(-1);
+
+    instsToExecute = i2e.getWire(-1);
+
+    IQ.setIssueExecQueue(&i2e);
+
+    dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
+    issueWidth = params->issueWidth ? params->issueWidth : width;
+    wbWidth = params->wbWidth ? params->wbWidth : width;
+    commitWidth = params->commitWidth ? params->commitWidth : width;
+
+    LSQ.init(params, params->LQEntries, params->SQEntries, 0);
+
+    dispatchStatus = Running;
+}
+
+template <class Impl>
+std::string
+BackEnd<Impl>::name() const
+{
+    return cpu->name() + ".backend";
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::regStats()
+{
+    using namespace Stats;
+    rob_cap_events
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:cap_events")
+        .desc("number of cycles where ROB cap was active")
+        .flags(total)
+        ;
+
+    rob_cap_inst_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:cap_inst")
+        .desc("number of instructions held up by ROB cap")
+        .flags(total)
+        ;
+
+    iq_cap_events
+        .init(cpu->number_of_threads)
+        .name(name() +".IQ:cap_events" )
+        .desc("number of cycles where IQ cap was active")
+        .flags(total)
+        ;
+
+    iq_cap_inst_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".IQ:cap_inst")
+        .desc("number of instructions held up by IQ cap")
+        .flags(total)
+        ;
+
+
+    exe_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:count")
+        .desc("number of insts issued")
+        .flags(total)
+        ;
+
+    exe_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:swp")
+        .desc("number of swp insts issued")
+        .flags(total)
+        ;
+
+    exe_nop
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:nop")
+        .desc("number of nop insts issued")
+        .flags(total)
+        ;
+
+    exe_refs
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:refs")
+        .desc("number of memory reference insts issued")
+        .flags(total)
+        ;
+
+    exe_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:loads")
+        .desc("number of load insts issued")
+        .flags(total)
+        ;
+
+    exe_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:branches")
+        .desc("Number of branches issued")
+        .flags(total)
+        ;
+
+    issued_ops
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:op_count")
+        .desc("number of insts issued")
+        .flags(total)
+        ;
+
+/*
+    for (int i=0; i<Num_OpClasses; ++i) {
+        stringstream subname;
+        subname << opClassStrings[i] << "_delay";
+        issue_delay_dist.subname(i, subname.str());
+    }
+*/
+    //
+    //  Other stats
+    //
+    lsq_forw_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".LSQ:forw_loads")
+        .desc("number of loads forwarded via LSQ")
+        .flags(total)
+        ;
+
+    inv_addr_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:addr_loads")
+        .desc("number of invalid-address loads")
+        .flags(total)
+        ;
+
+    inv_addr_swpfs
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:addr_swpfs")
+        .desc("number of invalid-address SW prefetches")
+        .flags(total)
+        ;
+
+    lsq_blocked_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".LSQ:blocked_loads")
+        .desc("number of ready loads not issued due to memory disambiguation")
+        .flags(total)
+        ;
+
+    lsqInversion
+        .name(name() + ".ISSUE:lsq_invert")
+        .desc("Number of times LSQ instruction issued early")
+        ;
+
+    n_issued_dist
+        .init(issueWidth + 1)
+        .name(name() + ".ISSUE:issued_per_cycle")
+        .desc("Number of insts issued each cycle")
+        .flags(total | pdf | dist)
+        ;
+    issue_delay_dist
+        .init(Num_OpClasses,0,99,2)
+        .name(name() + ".ISSUE:")
+        .desc("cycles from operands ready to issue")
+        .flags(pdf | cdf)
+        ;
+
+    queue_res_dist
+        .init(Num_OpClasses, 0, 99, 2)
+        .name(name() + ".IQ:residence:")
+        .desc("cycles from dispatch to issue")
+        .flags(total | pdf | cdf )
+        ;
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        queue_res_dist.subname(i, opClassStrings[i]);
+    }
+
+    writeback_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:count")
+        .desc("cumulative count of insts written-back")
+        .flags(total)
+        ;
+
+    producer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:producers")
+        .desc("num instructions producing a value")
+        .flags(total)
+        ;
+
+    consumer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:consumers")
+        .desc("num instructions consuming a value")
+        .flags(total)
+        ;
+
+    wb_penalized
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:penalized")
+        .desc("number of instrctions required to write to 'other' IQ")
+        .flags(total)
+        ;
+
+
+    wb_penalized_rate
+        .name(name() + ".WB:penalized_rate")
+        .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+        .flags(total)
+        ;
+
+    wb_penalized_rate = wb_penalized / writeback_count;
+
+    wb_fanout
+        .name(name() + ".WB:fanout")
+        .desc("average fanout of values written-back")
+        .flags(total)
+        ;
+
+    wb_fanout = producer_inst / consumer_inst;
+
+    wb_rate
+        .name(name() + ".WB:rate")
+        .desc("insts written-back per cycle")
+        .flags(total)
+        ;
+    wb_rate = writeback_count / cpu->numCycles;
+
+    stat_com_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:count")
+        .desc("Number of instructions committed")
+        .flags(total)
+        ;
+
+    stat_com_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:swp_count")
+        .desc("Number of s/w prefetches committed")
+        .flags(total)
+        ;
+
+    stat_com_refs
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:refs")
+        .desc("Number of memory references committed")
+        .flags(total)
+        ;
+
+    stat_com_loads
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:loads")
+        .desc("Number of loads committed")
+        .flags(total)
+        ;
+
+    stat_com_membars
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:membars")
+        .desc("Number of memory barriers committed")
+        .flags(total)
+        ;
+
+    stat_com_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:branches")
+        .desc("Number of branches committed")
+        .flags(total)
+        ;
+    n_committed_dist
+        .init(0,commitWidth,1)
+        .name(name() + ".COM:committed_per_cycle")
+        .desc("Number of insts commited each cycle")
+        .flags(pdf)
+        ;
+
+    //
+    //  Commit-Eligible instructions...
+    //
+    //  -> The number of instructions eligible to commit in those
+    //  cycles where we reached our commit BW limit (less the number
+    //  actually committed)
+    //
+    //  -> The average value is computed over ALL CYCLES... not just
+    //  the BW limited cycles
+    //
+    //  -> The standard deviation is computed only over cycles where
+    //  we reached the BW limit
+    //
+    commit_eligible
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:bw_limited")
+        .desc("number of insts not committed due to BW limits")
+        .flags(total)
+        ;
+
+    commit_eligible_samples
+        .name(name() + ".COM:bw_lim_events")
+        .desc("number cycles where commit BW limit reached")
+        ;
+
+    ROB_fcount
+        .name(name() + ".ROB:full_count")
+        .desc("number of cycles where ROB was full")
+        ;
+
+    ROB_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:occupancy")
+        .desc(name() + ".ROB occupancy (cumulative)")
+        .flags(total)
+        ;
+
+    ROB_full_rate
+        .name(name() + ".ROB:full_rate")
+        .desc("ROB full per cycle")
+        ;
+    ROB_full_rate = ROB_fcount / cpu->numCycles;
+
+    ROB_occ_rate
+        .name(name() + ".ROB:occ_rate")
+        .desc("ROB occupancy rate")
+        .flags(total)
+        ;
+    ROB_occ_rate = ROB_count / cpu->numCycles;
+
+    ROB_occ_dist
+        .init(cpu->number_of_threads,0,numROBEntries,2)
+        .name(name() + ".ROB:occ_dist")
+        .desc("ROB Occupancy per cycle")
+        .flags(total | cdf)
+        ;
+
+    IQ.regStats();
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+    comm = _comm;
+    toIEW = comm->getWire(0);
+    fromCommit = comm->getWire(-1);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::tick()
+{
+    DPRINTF(BE, "Ticking back end\n");
+
+    ROB_count[0]+= numInsts;
+
+    wbCycle = 0;
+
+    if (xcSquash) {
+        squashFromXC();
+    }
+
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
+    if (dispatchStatus != Blocked) {
+        d2i.advance();
+        dispatchInsts();
+    } else {
+        checkDispatchStatus();
+    }
+
+    i2e.advance();
+    scheduleReadyInsts();
+
+    e2c.advance();
+    executeInsts();
+
+    numInstsToWB.advance();
+    writebackInsts();
+
+    commitInsts();
+
+    assert(numInsts == instList.size());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateStructures()
+{
+    if (fromCommit->doneSeqNum) {
+        IQ.commit(fromCommit->doneSeqNum);
+        LSQ.commitLoads(fromCommit->doneSeqNum);
+        LSQ.commitStores(fromCommit->doneSeqNum);
+    }
+
+    if (fromCommit->nonSpecSeqNum) {
+        if (fromCommit->uncached) {
+            LSQ.executeLoad(fromCommit->lqIdx);
+        } else {
+            IQ.scheduleNonSpec(
+                fromCommit->nonSpecSeqNum);
+        }
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::addToIQ(DynInstPtr &inst)
+{
+    // Do anything IQ specific here?
+    IQ.insert(inst);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
+{
+    // Do anything LSQ specific here?
+    LSQ.insert(inst);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dispatchInsts()
+{
+    DPRINTF(BE, "Trying to dispatch instructions.\n");
+
+    // Pull instructions out of the front end.
+    int disp_width = dispatchWidth ? dispatchWidth : width;
+
+    // Could model dispatching time, but in general 1 cycle is probably
+    // good enough.
+
+    if (dispatchSize < numDispatchEntries) {
+        for (int i = 0; i < disp_width; i++) {
+            // Get instructions
+            DynInstPtr inst = frontEnd->getInst();
+
+            if (!inst) {
+                // No more instructions to get
+                break;
+            }
+
+            DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
+                    inst->seqNum, inst->readPC());
+
+            for (int i = 0; i < inst->numDestRegs(); ++i)
+                renameTable[inst->destRegIdx(i)] = inst;
+
+            // Add to queue to be dispatched.
+            dispatch.push_back(inst);
+
+            d2i[0].size++;
+            ++dispatchSize;
+        }
+    }
+
+    assert(dispatch.size() < 64);
+
+    for (int i = 0; i < instsToDispatch->size; ++i) {
+        assert(!dispatch.empty());
+        // Get instruction from front of time buffer
+        DynInstPtr inst = dispatch.front();
+        dispatch.pop_front();
+
+        if (inst->isSquashed())
+            continue;
+
+        --dispatchSize;
+        ++numInsts;
+        instList.push_back(inst);
+
+        DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
+                inst->seqNum, inst->readPC());
+
+        addToIQ(inst);
+
+        if (inst->isMemRef()) {
+            addToLSQ(inst);
+        }
+
+        if (inst->isNonSpeculative()) {
+            inst->setCanCommit();
+        }
+
+        // Check if IQ or LSQ is full.  If so we'll need to break and stop
+        // removing instructions.  Also update the number of insts to remove
+        // from the queue.
+        if (exactFullStall) {
+            bool stall = false;
+            if (IQ.isFull()) {
+                DPRINTF(BE, "IQ is full!\n");
+                stall = true;
+            } else if (LSQ.isFull()) {
+                DPRINTF(BE, "LSQ is full!\n");
+                stall = true;
+            } else if (isFull()) {
+                DPRINTF(BE, "ROB is full!\n");
+                stall = true;
+                ROB_fcount++;
+            }
+            if (stall) {
+                instsToDispatch->size-= i+1;
+                dispatchStall();
+                return;
+            }
+        }
+    }
+
+    // Check if IQ or LSQ is full.  If so we'll need to break and stop
+    // removing instructions.  Also update the number of insts to remove
+    // from the queue.  Check here if we don't care about exact stall
+    // conditions.
+
+    bool stall = false;
+    if (IQ.isFull()) {
+        DPRINTF(BE, "IQ is full!\n");
+        stall = true;
+    } else if (LSQ.isFull()) {
+        DPRINTF(BE, "LSQ is full!\n");
+        stall = true;
+    } else if (isFull()) {
+        DPRINTF(BE, "ROB is full!\n");
+        stall = true;
+        ROB_fcount++;
+    }
+    if (stall) {
+        d2i.advance();
+        dispatchStall();
+        return;
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dispatchStall()
+{
+    dispatchStatus = Blocked;
+    if (!cpu->decoupledFrontEnd) {
+        // Tell front end to stall here through a timebuffer, or just tell
+        // it directly.
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::checkDispatchStatus()
+{
+    assert(dispatchStatus == Blocked);
+    if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
+        DPRINTF(BE, "Dispatch no longer blocked\n");
+        dispatchStatus = Running;
+        dispatchInsts();
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::scheduleReadyInsts()
+{
+    // Tell IQ to put any ready instructions into the instruction list.
+    // Probably want to have a list of DynInstPtrs returned here.  Then I
+    // can choose to either put them into a time buffer to simulate
+    // IQ scheduling time, or hand them directly off to the next stage.
+    // Do you ever want to directly hand it off to the next stage?
+    DPRINTF(BE, "Trying to schedule ready instructions\n");
+    IQ.scheduleReadyInsts();
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::executeInsts()
+{
+    int insts_to_execute = instsToExecute->size;
+
+    issued_ops[0]+= insts_to_execute;
+    n_issued_dist[insts_to_execute]++;
+
+    DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
+
+    fetchRedirect[0] = false;
+
+    while (insts_to_execute > 0) {
+        // Get ready instruction from the IQ (or queue coming out of IQ)
+        // Execute the ready instruction.
+        // Wakeup any dependents if it's done.
+        DynInstPtr inst = IQ.getReadyInst();
+
+        DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
+                inst->seqNum, inst->readPC());
+
+        ++funcExeInst;
+
+        // Check if the instruction is squashed; if so then skip it
+        // and don't count it towards the FU usage.
+        if (inst->isSquashed()) {
+            DPRINTF(BE, "Execute: Instruction was squashed.\n");
+
+            // Not sure how to handle this plus the method of sending # of
+            // instructions to use.  Probably will just have to count it
+            // towards the bandwidth usage, but not the FU usage.
+            --insts_to_execute;
+
+            // Consider this instruction executed so that commit can go
+            // ahead and retire the instruction.
+            inst->setExecuted();
+
+            // Not sure if I should set this here or just let commit try to
+            // commit any squashed instructions.  I like the latter a bit more.
+            inst->setCanCommit();
+
+//            ++iewExecSquashedInsts;
+
+            continue;
+        }
+
+        Fault fault = NoFault;
+
+        // Execute instruction.
+        // Note that if the instruction faults, it will be handled
+        // at the commit stage.
+        if (inst->isMemRef() &&
+            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+            DPRINTF(BE, "Execute: Initiating access for memory "
+                    "reference.\n");
+
+            // Tell the LDSTQ to execute this instruction (if it is a load).
+            if (inst->isLoad()) {
+                // Loads will mark themselves as executed, and their writeback
+                // event adds the instruction to the queue to commit
+                fault = LSQ.executeLoad(inst);
+
+//                ++iewExecLoadInsts;
+            } else if (inst->isStore()) {
+                LSQ.executeStore(inst);
+
+//                ++iewExecStoreInsts;
+
+                if (!(inst->req->flags & LOCKED)) {
+                    inst->setExecuted();
+
+                    instToCommit(inst);
+                }
+                // Store conditionals will mark themselves as executed, and
+                // their writeback event will add the instruction to the queue
+                // to commit.
+            } else {
+                panic("Unexpected memory type!\n");
+            }
+
+        } else {
+            inst->execute();
+
+//            ++iewExecutedInsts;
+
+            inst->setExecuted();
+
+            instToCommit(inst);
+        }
+
+        updateExeInstStats(inst);
+
+        // Probably should have some sort of function for this.
+        // More general question of how to handle squashes?  Have some sort of
+        // squash unit that controls it?  Probably...
+        // Check if branch was correct.  This check happens after the
+        // instruction is added to the queue because even if the branch
+        // is mispredicted, the branch instruction itself is still valid.
+        // Only handle this if there hasn't already been something that
+        // redirects fetch in this group of instructions.
+
+        // This probably needs to prioritize the redirects if a different
+        // scheduler is used.  Currently the scheduler schedules the oldest
+        // instruction first, so the branch resolution order will be correct.
+        unsigned tid = inst->threadNumber;
+
+        if (!fetchRedirect[tid]) {
+
+            if (inst->mispredicted()) {
+                fetchRedirect[tid] = true;
+
+                DPRINTF(BE, "Execute: Branch mispredict detected.\n");
+                DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
+                        inst->nextPC);
+
+                // If incorrect, then signal the ROB that it must be squashed.
+                squashDueToBranch(inst);
+
+                if (inst->predTaken()) {
+//                    predictedTakenIncorrect++;
+                } else {
+//                    predictedNotTakenIncorrect++;
+                }
+            } else if (LSQ.violation()) {
+                fetchRedirect[tid] = true;
+
+                // Get the DynInst that caused the violation.  Note that this
+                // clears the violation signal.
+                DynInstPtr violator;
+                violator = LSQ.getMemDepViolator();
+
+                DPRINTF(BE, "LDSTQ detected a violation.  Violator PC: "
+                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+
+                // Tell the instruction queue that a violation has occured.
+//                IQ.violation(inst, violator);
+
+                // Squash.
+//                squashDueToMemOrder(inst,tid);
+                squashDueToBranch(inst);
+
+//                ++memOrderViolationEvents;
+            } else if (LSQ.loadBlocked()) {
+                fetchRedirect[tid] = true;
+
+                DPRINTF(BE, "Load operation couldn't execute because the "
+                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+                        inst->readPC(), inst->seqNum);
+
+                squashDueToMemBlocked(inst);
+            }
+        }
+
+//        instList.pop_front();
+
+        --insts_to_execute;
+
+        // keep an instruction count
+        thread->numInst++;
+        thread->numInsts++;
+    }
+
+    assert(insts_to_execute >= 0);
+}
+
+template<class Impl>
+void
+BackEnd<Impl>::instToCommit(DynInstPtr &inst)
+{
+    int wb_width = wbWidth;
+    // First check the time slot that this instruction will write
+    // to.  If there are free write ports at the time, then go ahead
+    // and write the instruction to that time.  If there are not,
+    // keep looking back to see where's the first time there's a
+    // free slot.  What happens if you run out of free spaces?
+    // For now naively assume that all instructions take one cycle.
+    // Otherwise would have to look into the time buffer based on the
+    // latency of the instruction.
+
+    DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
+            inst->seqNum, inst->readPC());
+
+    while (numInstsToWB[wbCycle].size >= wb_width) {
+        ++wbCycle;
+
+        assert(wbCycle < 5);
+    }
+
+    // Add finished instruction to queue to commit.
+    writeback.push_back(inst);
+    numInstsToWB[wbCycle].size++;
+
+    if (wbCycle)
+        wb_penalized[0]++;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::writebackInsts()
+{
+    int wb_width = wbWidth;
+    // Using this method I'm not quite sure how to prevent an
+    // instruction from waking its own dependents multiple times,
+    // without the guarantee that commit always has enough bandwidth
+    // to accept all instructions being written back.  This guarantee
+    // might not be too unrealistic.
+    InstListIt wb_inst_it = writeback.begin();
+    InstListIt wb_end_it = writeback.end();
+    int inst_num = 0;
+    int consumer_insts = 0;
+
+    for (; inst_num < wb_width &&
+             wb_inst_it != wb_end_it; inst_num++) {
+        DynInstPtr inst = (*wb_inst_it);
+
+        // Some instructions will be sent to commit without having
+        // executed because they need commit to handle them.
+        // E.g. Uncached loads have not actually executed when they
+        // are first sent to commit.  Instead commit must tell the LSQ
+        // when it's ready to execute the uncached load.
+        if (!inst->isSquashed()) {
+            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            inst->setCanCommit();
+            inst->setCompleted();
+
+            if (inst->isExecuted()) {
+                int dependents = IQ.wakeDependents(inst);
+                if (dependents) {
+                    producer_inst[0]++;
+                    consumer_insts+= dependents;
+                }
+            }
+        }
+
+        writeback.erase(wb_inst_it++);
+    }
+    LSQ.writebackStores();
+    consumer_inst[0]+= consumer_insts;
+    writeback_count[0]+= inst_num;
+}
+
+template <class Impl>
+bool
+BackEnd<Impl>::commitInst(int inst_num)
+{
+    // Read instruction from the head of the ROB
+    DynInstPtr inst = instList.front();
+
+    // Make sure instruction is valid
+    assert(inst);
+
+    if (!inst->readyToCommit())
+        return false;
+
+    DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
+            inst->seqNum, inst->readPC());
+
+    // If the instruction is not executed yet, then it is a non-speculative
+    // or store inst.  Signal backwards that it should be executed.
+    if (!inst->isExecuted()) {
+        // Keep this number correct.  We have not yet actually executed
+        // and committed this instruction.
+//        thread->funcExeInst--;
+
+        if (inst->isNonSpeculative()) {
+#if !FULL_SYSTEM
+            // Hack to make sure syscalls aren't executed until all stores
+            // write back their data.  This direct communication shouldn't
+            // be used for anything other than this.
+            if (inst_num > 0 || LSQ.hasStoresToWB()) {
+                DPRINTF(BE, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+#endif
+
+            DPRINTF(BE, "Encountered a store or non-speculative "
+                    "instruction at the head of the ROB, PC %#x.\n",
+                    inst->readPC());
+
+            // Send back the non-speculative instruction's sequence number.
+            toIEW->nonSpecSeqNum = inst->seqNum;
+
+            // Change the instruction so it won't try to commit again until
+            // it is executed.
+            inst->clearCanCommit();
+
+//            ++commitNonSpecStalls;
+
+            return false;
+        } else if (inst->isLoad()) {
+            DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            // Send back the non-speculative instruction's sequence
+            // number.  Maybe just tell the lsq to re-execute the load.
+            toIEW->nonSpecSeqNum = inst->seqNum;
+            toIEW->uncached = true;
+            toIEW->lqIdx = inst->lqIdx;
+
+            inst->clearCanCommit();
+
+            return false;
+        } else {
+            panic("Trying to commit un-executed instruction "
+                  "of unknown type!\n");
+        }
+    }
+
+    // Now check if it's one of the special trap or barrier or
+    // serializing instructions.
+    if (inst->isThreadSync())
+    {
+        // Not handled for now.
+        panic("Barrier instructions are not handled yet.\n");
+    }
+
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = inst->getFault();
+
+    if (inst_fault != NoFault) {
+        if (!inst->isNop()) {
+#if FULL_SYSTEM
+            DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+                    inst->seqNum, inst->readPC());
+
+//            assert(!thread->inSyscall);
+
+//            thread->inSyscall = true;
+
+            // Consider holding onto the trap and waiting until the trap event
+            // happens for this to be executed.
+            inst_fault->invoke(thread->getXCProxy());
+
+            // Exit state update mode to avoid accidental updating.
+//            thread->inSyscall = false;
+
+//            commitStatus = TrapPending;
+
+            // Generate trap squash event.
+//            generateTrapEvent();
+
+            return false;
+#else // !FULL_SYSTEM
+            panic("fault (%d) detected @ PC %08p", inst_fault,
+                  inst->PC);
+#endif // FULL_SYSTEM
+        }
+    }
+
+    if (inst->isControl()) {
+//        ++commitCommittedBranches;
+    }
+
+    int freed_regs = 0;
+
+    for (int i = 0; i < inst->numDestRegs(); ++i) {
+        DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+                (int)inst->destRegIdx(i), inst->seqNum);
+        thread->renameTable[inst->destRegIdx(i)] = inst;
+        ++freed_regs;
+    }
+
+    if (inst->traceData) {
+        inst->traceData->finalize();
+        inst->traceData = NULL;
+    }
+
+    inst->clearDependents();
+
+    frontEnd->addFreeRegs(freed_regs);
+
+    instList.pop_front();
+
+    --numInsts;
+    cpu->numInst++;
+    thread->numInsts++;
+    ++thread->funcExeInst;
+    thread->PC = inst->readNextPC();
+    updateComInstStats(inst);
+
+    // Write the done sequence number here.
+    toIEW->doneSeqNum = inst->seqNum;
+
+    return true;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::commitInsts()
+{
+    int commit_width = commitWidth ? commitWidth : width;
+
+    // Not sure this should be a loop or not.
+    int inst_num = 0;
+    while (!instList.empty() && inst_num < commit_width) {
+        if (instList.front()->isSquashed()) {
+            panic("No squashed insts should still be on the list!");
+            instList.front()->clearDependents();
+            instList.pop_front();
+            continue;
+        }
+
+        if (!commitInst(inst_num++)) {
+            break;
+        }
+    }
+    n_committed_dist.sample(inst_num);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squash(const InstSeqNum &sn)
+{
+    IQ.squash(sn);
+    LSQ.squash(sn);
+
+    int freed_regs = 0;
+    InstListIt dispatch_end = dispatch.end();
+    InstListIt insts_it = dispatch.end();
+    insts_it--;
+
+    while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
+    {
+        DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
+                (*insts_it)->readPC(),
+                (*insts_it)->seqNum);
+
+        // Mark the instruction as squashed, and ready to commit so that
+        // it can drain out of the pipeline.
+        (*insts_it)->setSquashed();
+
+        (*insts_it)->setCanCommit();
+
+        for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+            renameTable[(*insts_it)->destRegIdx(i)] =
+                (*insts_it)->getPrevDestInst(i);
+            ++freed_regs;
+        }
+
+        (*insts_it)->clearDependents();
+
+        --insts_it;
+    }
+
+    insts_it = instList.end();
+    insts_it--;
+
+    while (!instList.empty() && (*insts_it)->seqNum > sn)
+    {
+        DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
+                (*insts_it)->readPC(),
+                (*insts_it)->seqNum);
+
+        // Mark the instruction as squashed, and ready to commit so that
+        // it can drain out of the pipeline.
+        (*insts_it)->setSquashed();
+
+        (*insts_it)->setCanCommit();
+
+        for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+            renameTable[(*insts_it)->destRegIdx(i)] =
+                (*insts_it)->getPrevDestInst(i);
+            ++freed_regs;
+        }
+
+        (*insts_it)->clearDependents();
+
+        instList.erase(insts_it--);
+        --numInsts;
+    }
+
+    frontEnd->addFreeRegs(freed_regs);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashFromXC()
+{
+    xcSquash = true;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+    // Update the branch predictor state I guess
+    squash(inst->seqNum);
+    frontEnd->squash(inst->seqNum, inst->readNextPC(),
+                     true, inst->mispredicted());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
+{
+    DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
+            "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
+
+    squash(inst->seqNum - 1);
+    frontEnd->squash(inst->seqNum - 1, inst->readPC());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::fetchFault(Fault &fault)
+{
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+    int thread_number = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch())
+        exe_swp[thread_number]++;
+    else
+        exe_inst[thread_number]++;
+#else
+    exe_inst[thread_number]++;
+#endif
+
+    //
+    //  Control operations
+    //
+    if (inst->isControl())
+        exe_branches[thread_number]++;
+
+    //
+    //  Memory operations
+    //
+    if (inst->isMemRef()) {
+        exe_refs[thread_number]++;
+
+        if (inst->isLoad())
+            exe_loads[thread_number]++;
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+    unsigned thread = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch()) {
+        stat_com_swp[thread]++;
+    } else {
+        stat_com_inst[thread]++;
+    }
+#else
+    stat_com_inst[thread]++;
+#endif
+
+    //
+    //  Control Instructions
+    //
+    if (inst->isControl())
+        stat_com_branches[thread]++;
+
+    //
+    //  Memory references
+    //
+    if (inst->isMemRef()) {
+        stat_com_refs[thread]++;
+
+        if (inst->isLoad()) {
+            stat_com_loads[thread]++;
+        }
+    }
+
+    if (inst->isMemBarrier()) {
+        stat_com_membars[thread]++;
+    }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dumpInsts()
+{
+    int num = 0;
+    int valid_num = 0;
+
+    InstListIt inst_list_it = instList.begin();
+
+    cprintf("Inst list size: %i\n", instList.size());
+
+    while (inst_list_it != instList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it++;
+        ++num;
+    }
+
+    cprintf("Dispatch list size: %i\n", dispatch.size());
+
+    inst_list_it = dispatch.begin();
+
+    while (inst_list_it != dispatch.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it++;
+        ++num;
+    }
+
+    cprintf("Writeback list size: %i\n", writeback.size());
+
+    inst_list_it = writeback.begin();
+
+    while (inst_list_it != writeback.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it++;
+        ++num;
+    }
+}
diff --git a/cpu/ozone/cpu.cc b/cpu/ozone/cpu.cc
index cbeca9d3b..d2ea0164c 100644
--- a/cpu/ozone/cpu.cc
+++ b/cpu/ozone/cpu.cc
@@ -26,8 +26,9 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "cpu/ooo_cpu/ooo_cpu_impl.hh"
-#include "cpu/ooo_cpu/ooo_dyn_inst.hh"
-#include "cpu/ooo_cpu/ooo_impl.hh"
+#include "cpu/ozone/cpu_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
 
-template class OoOCPU<OoOImpl>;
+template class OzoneCPU<SimpleImpl>;
+template class OzoneCPU<OzoneImpl>;
diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index f5d84d656..200ced265 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -26,15 +26,19 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __CPU_OOO_CPU_OOO_CPU_HH__
-#define __CPU_OOO_CPU_OOO_CPU_HH__
+#ifndef __CPU_OZONE_CPU_HH__
+#define __CPU_OZONE_CPU_HH__
+
+#include <set>
 
 #include "base/statistics.hh"
+#include "base/timebuf.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
 #include "cpu/exec_context.hh"
-#include "encumbered/cpu/full/fu_pool.hh"
-#include "cpu/ooo_cpu/ea_list.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
 #include "cpu/pc_event.hh"
 #include "cpu/static_inst.hh"
 #include "mem/mem_interface.hh"
@@ -42,16 +46,19 @@
 
 // forward declarations
 #if FULL_SYSTEM
-class Processor;
+#include "arch/alpha/tlb.hh"
+
 class AlphaITB;
 class AlphaDTB;
 class PhysicalMemory;
+class MemoryController;
 
 class RemoteGDB;
 class GDBListener;
 
 #else
 
+class PageTable;
 class Process;
 
 #endif // FULL_SYSTEM
@@ -72,23 +79,180 @@ namespace Trace {
  */
 
 template <class Impl>
-class OoOCPU : public BaseCPU
+class OzoneCPU : public BaseCPU
 {
   private:
+    typedef typename Impl::FrontEnd FrontEnd;
+    typedef typename Impl::BackEnd BackEnd;
+    typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInstPtr DynInstPtr;
 
+    typedef TheISA::MiscReg MiscReg;
+
+  public:
+    class OzoneXC : public ExecContext {
+      public:
+        OzoneCPU<Impl> *cpu;
+
+        OzoneThreadState<Impl> *thread;
+
+        BaseCPU *getCpuPtr();
+
+        void setCpuId(int id);
+
+        int readCpuId() { return thread->cpuId; }
+
+        FunctionalMemory *getMemPtr() { return thread->mem; }
+
+#if FULL_SYSTEM
+        System *getSystemPtr() { return cpu->system; }
+
+        PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
+
+        AlphaITB *getITBPtr() { return cpu->itb; }
+
+        AlphaDTB * getDTBPtr() { return cpu->dtb; }
+#else
+        Process *getProcessPtr() { return thread->process; }
+#endif
+
+        Status status() const { return thread->_status; }
+
+        void setStatus(Status new_status);
+
+        /// Set the status to Active.  Optional delay indicates number of
+        /// cycles to wait before beginning execution.
+        void activate(int delay = 1);
+
+        /// Set the status to Suspended.
+        void suspend();
+
+        /// Set the status to Unallocated.
+        void deallocate();
+
+        /// Set the status to Halted.
+        void halt();
+
+#if FULL_SYSTEM
+        void dumpFuncProfile();
+#endif
+
+        void takeOverFrom(ExecContext *old_context);
+
+        void regStats(const std::string &name);
+
+        void serialize(std::ostream &os);
+        void unserialize(Checkpoint *cp, const std::string &section);
+
+#if FULL_SYSTEM
+        Event *getQuiesceEvent();
+
+        Tick readLastActivate();
+        Tick readLastSuspend();
+
+        void profileClear();
+        void profileSample();
+#endif
+
+        int getThreadNum();
+
+        // Also somewhat obnoxious.  Really only used for the TLB fault.
+        TheISA::MachInst getInst();
+
+        void copyArchRegs(ExecContext *xc);
+
+        void clearArchRegs();
+
+        uint64_t readIntReg(int reg_idx);
+
+        float readFloatRegSingle(int reg_idx);
+
+        double readFloatRegDouble(int reg_idx);
+
+        uint64_t readFloatRegInt(int reg_idx);
+
+        void setIntReg(int reg_idx, uint64_t val);
+
+        void setFloatRegSingle(int reg_idx, float val);
+
+        void setFloatRegDouble(int reg_idx, double val);
+
+        void setFloatRegInt(int reg_idx, uint64_t val);
+
+        uint64_t readPC() { return thread->PC; }
+        void setPC(Addr val);
+
+        uint64_t readNextPC() { return thread->nextPC; }
+        void setNextPC(Addr val);
+
+      public:
+        // ISA stuff:
+        MiscReg readMiscReg(int misc_reg);
+
+        MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
+
+        Fault setMiscReg(int misc_reg, const MiscReg &val);
+
+        Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+
+        unsigned readStCondFailures()
+        { return thread->storeCondFailures; }
+
+        void setStCondFailures(unsigned sc_failures)
+        { thread->storeCondFailures = sc_failures; }
+
+#if FULL_SYSTEM
+        bool inPalMode() { return cpu->inPalMode(); }
+#endif
+
+        bool misspeculating() { return false; }
+
+#if !FULL_SYSTEM
+        TheISA::IntReg getSyscallArg(int i)
+        { return thread->renameTable[TheISA::ArgumentReg0 + i]->readIntResult(); }
+
+        // used to shift args for indirect syscall
+        void setSyscallArg(int i, TheISA::IntReg val)
+        { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); }
+
+        void setSyscallReturn(SyscallReturn return_value)
+        { cpu->setSyscallReturn(return_value, thread->tid); }
+
+        Counter readFuncExeInst() { return thread->funcExeInst; }
+
+        void setFuncExeInst(Counter new_val)
+        { thread->funcExeInst = new_val; }
+#endif
+    };
+
+    // execution context proxy
+    OzoneXC xcProxy;
+
+    typedef OzoneThreadState<Impl> ImplState;
+
+  private:
+    OzoneThreadState<Impl> thread;
+/*
+    // Squash event for when the XC needs to squash all inflight instructions.
+    struct XCSquashEvent : public Event
+    {
+        void process();
+        const char *description();
+    };
+*/
   public:
     // main simulation loop (one cycle)
     void tick();
 
+    std::set<InstSeqNum> snList;
   private:
     struct TickEvent : public Event
     {
-        OoOCPU *cpu;
+        OzoneCPU *cpu;
         int width;
 
-        TickEvent(OoOCPU *c, int w);
+        TickEvent(OzoneCPU *c, int w);
         void process();
         const char *description();
     };
@@ -122,16 +286,14 @@ class OoOCPU : public BaseCPU
     enum Status {
         Running,
         Idle,
-        IcacheMiss,
-        IcacheMissComplete,
-        DcacheMissStall,
         SwitchedOut
     };
 
-  private:
     Status _status;
 
   public:
+    bool checkInterrupts;
+
     void post_interrupt(int int_num, int index);
 
     void zero_fill_64(Addr addr) {
@@ -142,33 +304,24 @@ class OoOCPU : public BaseCPU
         }
     };
 
-    struct Params : public BaseCPU::Params
-    {
-        MemInterface *icache_interface;
-        MemInterface *dcache_interface;
-        int width;
-#if FULL_SYSTEM
-        AlphaITB *itb;
-        AlphaDTB *dtb;
-        FunctionalMemory *mem;
-#else
-        Process *process;
-#endif
-        int issueWidth;
-    };
+    typedef typename Impl::Params Params;
 
-    OoOCPU(Params *params);
+    OzoneCPU(Params *params);
 
-    virtual ~OoOCPU();
+    virtual ~OzoneCPU();
 
     void init();
 
-  private:
-    void copyFromXC();
-
   public:
-    // execution context
-    ExecContext *xc;
+    BaseCPU *getCpuPtr() { return this; }
+
+    void setCpuId(int id) { cpuId = id; }
+
+    int readCpuId() { return cpuId; }
+
+//    FunctionalMemory *getMemPtr() { return mem; }
+
+    int cpuId;
 
     void switchOut();
     void takeOverFrom(BaseCPU *oldCPU);
@@ -177,6 +330,16 @@ class OoOCPU : public BaseCPU
     Addr dbg_vtophys(Addr addr);
 
     bool interval_stats;
+
+    AlphaITB *itb;
+    AlphaDTB *dtb;
+    System *system;
+
+    // the following two fields are redundant, since we can always
+    // look them up through the system pointer, but we'll leave them
+    // here for now for convenience
+    MemoryController *memctrl;
+    PhysicalMemory *physmem;
 #endif
 
     // L1 instruction cache
@@ -185,54 +348,18 @@ class OoOCPU : public BaseCPU
     // L1 data cache
     MemInterface *dcacheInterface;
 
-    FuncUnitPool *fuPool;
-
-    // Refcounted pointer to the one memory request.
-    MemReqPtr cacheMemReq;
-
-    class ICacheCompletionEvent : public Event
-    {
-      private:
-        OoOCPU *cpu;
-
-      public:
-        ICacheCompletionEvent(OoOCPU *_cpu);
-
-        virtual void process();
-        virtual const char *description();
-    };
-
-    // Will need to create a cache completion event upon any memory miss.
-    ICacheCompletionEvent iCacheCompletionEvent;
-
-    class DCacheCompletionEvent;
-
-    typedef typename
-    std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
-
-    class DCacheCompletionEvent : public Event
-    {
-      private:
-        OoOCPU *cpu;
-        DynInstPtr inst;
-        DCacheCompEventIt dcceIt;
-
-      public:
-        DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
-                              DCacheCompEventIt &_dcceIt);
-
-        virtual void process();
-        virtual const char *description();
-    };
-
-    friend class DCacheCompletionEvent;
+#if !FULL_SYSTEM
+    PageTable *pTable;
+#endif
 
-  protected:
-    std::list<DCacheCompletionEvent> dCacheCompList;
-    DCacheCompEventIt dcceIt;
+    FrontEnd *frontEnd;
 
+    BackEnd *backEnd;
   private:
     Status status() const { return _status; }
+    void setStatus(Status new_status) { _status = new_status; }
+
+    // Not sure what an activate() call on the CPU's proxy XC would mean...
 
     virtual void activateContext(int thread_num, int delay);
     virtual void suspendContext(int thread_num);
@@ -244,17 +371,19 @@ class OoOCPU : public BaseCPU
     virtual void resetStats();
 
     // number of simulated instructions
+  public:
     Counter numInst;
     Counter startNumInst;
-    Stats::Scalar<> numInsts;
+//    Stats::Scalar<> numInsts;
 
     virtual Counter totalInstructions() const
     {
         return numInst - startNumInst;
     }
 
+  private:
     // number of simulated memory references
-    Stats::Scalar<> numMemRefs;
+//    Stats::Scalar<> numMemRefs;
 
     // number of simulated loads
     Counter numLoad;
@@ -263,27 +392,15 @@ class OoOCPU : public BaseCPU
     // number of idle cycles
     Stats::Average<> notIdleFraction;
     Stats::Formula idleFraction;
-
-    // number of cycles stalled for I-cache misses
-    Stats::Scalar<> icacheStallCycles;
-    Counter lastIcacheStall;
-
-    // number of cycles stalled for D-cache misses
-    Stats::Scalar<> dcacheStallCycles;
-    Counter lastDcacheStall;
-
-    void processICacheCompletion();
-
   public:
 
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
+
 #if FULL_SYSTEM
     bool validInstAddr(Addr addr) { return true; }
     bool validDataAddr(Addr addr) { return true; }
-    int getInstAsid() { return xc->regs.instAsid(); }
-    int getDataAsid() { return xc->regs.dataAsid(); }
 
     Fault translateInstReq(MemReqPtr &req)
     {
@@ -302,13 +419,13 @@ class OoOCPU : public BaseCPU
 
 #else
     bool validInstAddr(Addr addr)
-    { return xc->validInstAddr(addr); }
+    { return true; }
 
     bool validDataAddr(Addr addr)
-    { return xc->validDataAddr(addr); }
+    { return true; }
 
-    int getInstAsid() { return xc->asid; }
-    int getDataAsid() { return xc->asid; }
+    int getInstAsid() { return thread.asid; }
+    int getDataAsid() { return thread.asid; }
 
     Fault dummyTranslation(MemReqPtr &req)
     {
@@ -321,27 +438,38 @@ class OoOCPU : public BaseCPU
         req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
         return NoFault;
     }
+
+    /** Translates instruction requestion in syscall emulation mode. */
     Fault translateInstReq(MemReqPtr &req)
     {
         return dummyTranslation(req);
     }
+
+    /** Translates data read request in syscall emulation mode. */
     Fault translateDataReadReq(MemReqPtr &req)
     {
         return dummyTranslation(req);
     }
+
+    /** Translates data write request in syscall emulation mode. */
     Fault translateDataWriteReq(MemReqPtr &req)
     {
         return dummyTranslation(req);
     }
-
 #endif
-
+    /** CPU read function, forwards read to LSQ. */
     template <class T>
-    Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst);
+    Fault read(MemReqPtr &req, T &data, int load_idx)
+    {
+        return backEnd->read(req, data, load_idx);
+    }
 
+    /** CPU write function, forwards write to LSQ. */
     template <class T>
-    Fault write(T data, Addr addr, unsigned flags,
-                uint64_t *res, DynInstPtr inst);
+    Fault write(MemReqPtr &req, T &data, int store_idx)
+    {
+        return backEnd->write(req, data, store_idx);
+    }
 
     void prefetch(Addr addr, unsigned flags)
     {
@@ -357,270 +485,38 @@ class OoOCPU : public BaseCPU
 
     Fault copy(Addr dest);
 
-  private:
-    bool executeInst(DynInstPtr &inst);
-
-    void renameInst(DynInstPtr &inst);
-
-    void addInst(DynInstPtr &inst);
-
-    void commitHeadInst();
-
-    bool getOneInst();
-
-    Fault fetchCacheLine();
-
-    InstSeqNum getAndIncrementInstSeq();
-
-    bool ambigMemAddr;
-
-  private:
     InstSeqNum globalSeqNum;
 
-    DynInstPtr renameTable[TheISA::TotalNumRegs];
-    DynInstPtr commitTable[TheISA::TotalNumRegs];
-
-    // Might need a table of the shadow registers as well.
-#if FULL_SYSTEM
-    DynInstPtr palShadowTable[TheISA::NumIntRegs];
-#endif
-
-  public:
-    // The register accessor methods provide the index of the
-    // instruction's operand (e.g., 0 or 1), not the architectural
-    // register index, to simplify the implementation of register
-    // renaming.  We find the architectural register index by indexing
-    // into the instruction's own operand index table.  Note that a
-    // raw pointer to the StaticInst is provided instead of a
-    // ref-counted StaticInstPtr to redice overhead.  This is fine as
-    // long as these methods don't copy the pointer into any long-term
-    // storage (which is pretty hard to imagine they would have reason
-    // to do).
-
-    // In the OoO case these shouldn't read from the XC but rather from the
-    // rename table of DynInsts.  Also these likely shouldn't be called very
-    // often, other than when adding things into the xc during say a syscall.
-
-    uint64_t readIntReg(StaticInst *si, int idx)
-    {
-        return xc->readIntReg(si->srcRegIdx(idx));
-    }
-
-    float readFloatRegSingle(StaticInst *si, int idx)
-    {
-        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
-        return xc->readFloatRegSingle(reg_idx);
-    }
-
-    double readFloatRegDouble(StaticInst *si, int idx)
-    {
-        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
-        return xc->readFloatRegDouble(reg_idx);
-    }
-
-    uint64_t readFloatRegInt(StaticInst *si, int idx)
-    {
-        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
-        return xc->readFloatRegInt(reg_idx);
-    }
-
-    void setIntReg(StaticInst *si, int idx, uint64_t val)
-    {
-        xc->setIntReg(si->destRegIdx(idx), val);
-    }
-
-    void setFloatRegSingle(StaticInst *si, int idx, float val)
-    {
-        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
-        xc->setFloatRegSingle(reg_idx, val);
-    }
-
-    void setFloatRegDouble(StaticInst *si, int idx, double val)
-    {
-        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
-        xc->setFloatRegDouble(reg_idx, val);
-    }
-
-    void setFloatRegInt(StaticInst *si, int idx, uint64_t val)
-    {
-        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
-        xc->setFloatRegInt(reg_idx, val);
-    }
-
-    uint64_t readPC() { return PC; }
-    void setNextPC(Addr val) { nextPC = val; }
-
-  private:
-    Addr PC;
-    Addr nextPC;
-
-    unsigned issueWidth;
-
-    bool fetchRedirExcp;
-    bool fetchRedirBranch;
-
-    /** Mask to get a cache block's address. */
-    Addr cacheBlkMask;
-
-    unsigned cacheBlkSize;
-
-    Addr cacheBlkPC;
-
-    /** The cache line being fetched. */
-    uint8_t *cacheData;
-
-  protected:
-    bool cacheBlkValid;
-
-  private:
-
-    // Align an address (typically a PC) to the start of an I-cache block.
-    // We fold in the PISA 64- to 32-bit conversion here as well.
-    Addr icacheBlockAlignPC(Addr addr)
-    {
-        addr = TheISA::realPCToFetchPC(addr);
-        return (addr & ~(cacheBlkMask));
-    }
-
-    unsigned instSize;
-
-    // ROB tracking stuff.
-    DynInstPtr robHeadPtr;
-    DynInstPtr robTailPtr;
-    unsigned robSize;
-    unsigned robInsts;
-
-    // List of outstanding EA instructions.
-  protected:
-    EAList eaList;
-
   public:
-    void branchToTarget(Addr val)
-    {
-        if (!fetchRedirExcp) {
-            fetchRedirBranch = true;
-            PC = val;
-        }
-    }
+    void squashFromXC();
 
-    // ISA stuff:
-    uint64_t readUniq() { return xc->readUniq(); }
-    void setUniq(uint64_t val) { xc->setUniq(val); }
-
-    uint64_t readFpcr() { return xc->readFpcr(); }
-    void setFpcr(uint64_t val) { xc->setFpcr(val); }
+    // @todo: This can be a useful debug function.  Implement it.
+    void dumpInsts() { frontEnd->dumpInsts(); }
 
 #if FULL_SYSTEM
-    uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); }
-    Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); }
-    Fault hwrei() { return xc->hwrei(); }
-    int readIntrFlag() { return xc->readIntrFlag(); }
-    void setIntrFlag(int val) { xc->setIntrFlag(val); }
-    bool inPalMode() { return xc->inPalMode(); }
-    void trap(Fault fault) { fault->invoke(xc); }
-    bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); }
+    Fault hwrei();
+    int readIntrFlag() { return thread.regs.intrflag; }
+    void setIntrFlag(int val) { thread.regs.intrflag = val; }
+    bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
+    bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
+    bool simPalCheck(int palFunc);
 #else
-    void syscall() { xc->syscall(); }
-#endif
-
-    ExecContext *xcBase() { return xc; }
-};
-
-
-// precise architected memory state accessor macros
-template <class Impl>
-template <class T>
-Fault
-OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
-{
-    MemReqPtr readReq = new MemReq();
-    readReq->xc = xc;
-    readReq->asid = 0;
-    readReq->data = new uint8_t[64];
-
-    readReq->reset(addr, sizeof(T), flags);
-
-    // translate to physical address - This might be an ISA impl call
-    Fault fault = translateDataReadReq(readReq);
-
-    // do functional access
-    if (fault == NoFault)
-        fault = xc->mem->read(readReq, data);
-#if 0
-    if (traceData) {
-        traceData->setAddr(addr);
-        if (fault == NoFault)
-            traceData->setData(data);
-    }
-#endif
-
-    // if we have a cache, do cache access too
-    if (fault == NoFault && dcacheInterface) {
-        readReq->cmd = Read;
-        readReq->completionEvent = NULL;
-        readReq->time = curTick;
-        /*MemAccessResult result = */dcacheInterface->access(readReq);
-
-        if (dcacheInterface->doEvents()) {
-            readReq->completionEvent = new DCacheCompletionEvent(this, inst,
-                                                                 dcceIt);
-        }
-    }
-
-    if (!dcacheInterface && (readReq->flags & UNCACHEABLE))
-        recordEvent("Uncached Read");
-
-    return fault;
-}
-
-template <class Impl>
-template <class T>
-Fault
-OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
-                    uint64_t *res, DynInstPtr inst)
-{
-    MemReqPtr writeReq = new MemReq();
-    writeReq->xc = xc;
-    writeReq->asid = 0;
-    writeReq->data = new uint8_t[64];
-
-#if 0
-    if (traceData) {
-        traceData->setAddr(addr);
-        traceData->setData(data);
-    }
+    void syscall();
+    void setSyscallReturn(SyscallReturn return_value, int tid);
 #endif
 
-    writeReq->reset(addr, sizeof(T), flags);
-
-    // translate to physical address
-    Fault fault = translateDataWriteReq(writeReq);
-
-    // do functional access
-    if (fault == NoFault)
-        fault = xc->write(writeReq, data);
-
-    if (fault == NoFault && dcacheInterface) {
-        writeReq->cmd = Write;
-        memcpy(writeReq->data,(uint8_t *)&data,writeReq->size);
-        writeReq->completionEvent = NULL;
-        writeReq->time = curTick;
-        /*MemAccessResult result = */dcacheInterface->access(writeReq);
-
-        if (dcacheInterface->doEvents()) {
-            writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
-                                                                  dcceIt);
-        }
-    }
+    ExecContext *xcBase() { return &xcProxy; }
 
-    if (res && (fault == NoFault))
-        *res = writeReq->result;
-
-    if (!dcacheInterface && (writeReq->flags & UNCACHEABLE))
-        recordEvent("Uncached Write");
-
-    return fault;
-}
+    bool decoupledFrontEnd;
+    struct CommStruct {
+        InstSeqNum doneSeqNum;
+        InstSeqNum nonSpecSeqNum;
+        bool uncached;
+        unsigned lqIdx;
 
+        bool stall;
+    };
+    TimeBuffer<CommStruct> comm;
+};
 
-#endif // __CPU_OOO_CPU_OOO_CPU_HH__
+#endif // __CPU_OZONE_CPU_HH__
diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc
new file mode 100644
index 000000000..0146dd1bd
--- /dev/null
+++ b/cpu/ozone/cpu_builder.cc
@@ -0,0 +1,818 @@
+
+#include <string>
+
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/cpu.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+#include "cpu/ozone/simple_params.hh"
+#include "mem/cache/base_cache.hh"
+#include "sim/builder.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+class DerivOzoneCPU : public OzoneCPU<OzoneImpl>
+{
+  public:
+    DerivOzoneCPU(SimpleParams *p)
+        : OzoneCPU<OzoneImpl>(p)
+    { }
+};
+
+class SimpleOzoneCPU : public OzoneCPU<SimpleImpl>
+{
+  public:
+    SimpleOzoneCPU(SimpleParams *p)
+        : OzoneCPU<SimpleImpl>(p)
+    { }
+};
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  OzoneCPU Simulation Object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+    Param<int> clock;
+    Param<int> numThreads;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
+//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
+
+SimObjectParam<FunctionalMemory *> mem;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> cachePorts;
+Param<unsigned> width;
+Param<unsigned> frontEndWidth;
+Param<unsigned> backEndWidth;
+Param<unsigned> backEndSquashLatency;
+Param<unsigned> backEndLatency;
+Param<unsigned> maxInstBufferSize;
+Param<unsigned> numPhysicalRegs;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<bool> decoupledFrontEnd;
+Param<int> dispatchWidth;
+Param<int> wbWidth;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string>   smtFetchPolicy;
+Param<std::string>   smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string>   smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string>   smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string>   smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+    INIT_PARAM(clock, "clock speed"),
+    INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#if FULL_SYSTEM
+    INIT_PARAM(system, "System object"),
+    INIT_PARAM(cpu_id, "processor ID"),
+    INIT_PARAM(itb, "Instruction translation buffer"),
+    INIT_PARAM(dtb, "Data translation buffer"),
+#else
+    INIT_PARAM(workload, "Processes to run"),
+//    INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+    INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+    INIT_PARAM_DFLT(max_insts_any_thread,
+                    "Terminate when any thread reaches this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_insts_all_threads,
+                    "Terminate when all threads have reached"
+                    "this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_any_thread,
+                    "Terminate when any thread reaches this load count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_all_threads,
+                    "Terminate when all threads have reached this load"
+                    "count",
+                    0),
+
+    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+    INIT_PARAM_DFLT(width, "Width", 1),
+    INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+    INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+    INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+    INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+    INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+    INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+
+    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+               "delay"),
+    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+    INIT_PARAM(fetchWidth, "Fetch width"),
+    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+               "delay"),
+    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+    INIT_PARAM(decodeWidth, "Decode width"),
+
+    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+               "delay"),
+    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+    INIT_PARAM(renameWidth, "Rename width"),
+
+    INIT_PARAM(commitToIEWDelay, "Commit to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(renameToIEWDelay, "Rename to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+               "to the IEW stage)"),
+    INIT_PARAM(issueWidth, "Issue width"),
+    INIT_PARAM(executeWidth, "Execute width"),
+    INIT_PARAM(executeIntWidth, "Integer execute width"),
+    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+    INIT_PARAM(executeBranchWidth, "Branch execute width"),
+    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+
+    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+               "delay"),
+    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+    INIT_PARAM(commitWidth, "Commit width"),
+    INIT_PARAM(squashWidth, "Squash width"),
+
+    INIT_PARAM(localPredictorSize, "Size of local predictor"),
+    INIT_PARAM(localCtrBits, "Bits per counter"),
+    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+    INIT_PARAM(localHistoryBits, "Bits for the local history"),
+    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+    INIT_PARAM(globalCtrBits, "Bits per counter"),
+    INIT_PARAM(globalHistoryBits, "Bits of history"),
+    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+    INIT_PARAM(BTBEntries, "Number of BTB entries"),
+    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+    INIT_PARAM(RASSize, "RAS size"),
+
+    INIT_PARAM(LQEntries, "Number of load queue entries"),
+    INIT_PARAM(SQEntries, "Number of store queue entries"),
+    INIT_PARAM(LFSTSize, "Last fetched store table size"),
+    INIT_PARAM(SSITSize, "Store set ID table size"),
+
+    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+               "registers"),
+    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+    INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+    INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+    INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+
+    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
+    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+CREATE_SIM_OBJECT(DerivOzoneCPU)
+{
+    DerivOzoneCPU *cpu;
+
+#if FULL_SYSTEM
+    // Full-system only supports a single thread for the moment.
+    int actual_num_threads = 1;
+#else
+    // In non-full-system mode, we infer the number of threads from
+    // the workload if it's not explicitly specified.
+    int actual_num_threads =
+        numThreads.isValid() ? numThreads : workload.size();
+
+    if (workload.size() == 0) {
+        fatal("Must specify at least one workload!");
+    }
+
+#endif
+
+    SimpleParams *params = new SimpleParams;
+
+    params->clock = clock;
+
+    params->name = getInstanceName();
+    params->numberOfThreads = actual_num_threads;
+
+#if FULL_SYSTEM
+    params->system = system;
+    params->cpu_id = cpu_id;
+    params->itb = itb;
+    params->dtb = dtb;
+#else
+    params->workload = workload;
+//    params->pTable = page_table;
+#endif // FULL_SYSTEM
+
+    params->mem = mem;
+
+    params->max_insts_any_thread = max_insts_any_thread;
+    params->max_insts_all_threads = max_insts_all_threads;
+    params->max_loads_any_thread = max_loads_any_thread;
+    params->max_loads_all_threads = max_loads_all_threads;
+
+    //
+    // Caches
+    //
+    params->icacheInterface = icache ? icache->getInterface() : NULL;
+    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+    params->cachePorts = cachePorts;
+
+    params->width = width;
+    params->frontEndWidth = frontEndWidth;
+    params->backEndWidth = backEndWidth;
+    params->backEndSquashLatency = backEndSquashLatency;
+    params->backEndLatency = backEndLatency;
+    params->maxInstBufferSize = maxInstBufferSize;
+    params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+
+    params->decodeToFetchDelay = decodeToFetchDelay;
+    params->renameToFetchDelay = renameToFetchDelay;
+    params->iewToFetchDelay = iewToFetchDelay;
+    params->commitToFetchDelay = commitToFetchDelay;
+    params->fetchWidth = fetchWidth;
+
+    params->renameToDecodeDelay = renameToDecodeDelay;
+    params->iewToDecodeDelay = iewToDecodeDelay;
+    params->commitToDecodeDelay = commitToDecodeDelay;
+    params->fetchToDecodeDelay = fetchToDecodeDelay;
+    params->decodeWidth = decodeWidth;
+
+    params->iewToRenameDelay = iewToRenameDelay;
+    params->commitToRenameDelay = commitToRenameDelay;
+    params->decodeToRenameDelay = decodeToRenameDelay;
+    params->renameWidth = renameWidth;
+
+    params->commitToIEWDelay = commitToIEWDelay;
+    params->renameToIEWDelay = renameToIEWDelay;
+    params->issueToExecuteDelay = issueToExecuteDelay;
+    params->issueWidth = issueWidth;
+    params->executeWidth = executeWidth;
+    params->executeIntWidth = executeIntWidth;
+    params->executeFloatWidth = executeFloatWidth;
+    params->executeBranchWidth = executeBranchWidth;
+    params->executeMemoryWidth = executeMemoryWidth;
+
+    params->iewToCommitDelay = iewToCommitDelay;
+    params->renameToROBDelay = renameToROBDelay;
+    params->commitWidth = commitWidth;
+    params->squashWidth = squashWidth;
+
+
+    params->localPredictorSize = localPredictorSize;
+    params->localCtrBits = localCtrBits;
+    params->localHistoryTableSize = localHistoryTableSize;
+    params->localHistoryBits = localHistoryBits;
+    params->globalPredictorSize = globalPredictorSize;
+    params->globalCtrBits = globalCtrBits;
+    params->globalHistoryBits = globalHistoryBits;
+    params->choicePredictorSize = choicePredictorSize;
+    params->choiceCtrBits = choiceCtrBits;
+
+    params->BTBEntries = BTBEntries;
+    params->BTBTagSize = BTBTagSize;
+
+    params->RASSize = RASSize;
+
+    params->LQEntries = LQEntries;
+    params->SQEntries = SQEntries;
+
+    params->SSITSize = SSITSize;
+    params->LFSTSize = LFSTSize;
+
+    params->numPhysIntRegs = numPhysIntRegs;
+    params->numPhysFloatRegs = numPhysFloatRegs;
+    params->numIQEntries = numIQEntries;
+    params->numROBEntries = numROBEntries;
+
+    params->decoupledFrontEnd = decoupledFrontEnd;
+    params->dispatchWidth = dispatchWidth;
+    params->wbWidth = wbWidth;
+
+    params->smtNumFetchingThreads = smtNumFetchingThreads;
+    params->smtFetchPolicy = smtFetchPolicy;
+    params->smtIQPolicy    = smtIQPolicy;
+    params->smtLSQPolicy    = smtLSQPolicy;
+    params->smtLSQThreshold = smtLSQThreshold;
+    params->smtROBPolicy   = smtROBPolicy;
+    params->smtROBThreshold = smtROBThreshold;
+    params->smtCommitPolicy = smtCommitPolicy;
+
+    params->instShiftAmt = 2;
+
+    params->deferRegistration = defer_registration;
+
+    params->functionTrace = function_trace;
+    params->functionTraceStart = function_trace_start;
+
+    cpu = new DerivOzoneCPU(params);
+
+    return cpu;
+}
+
+REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU)
+
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  OzoneCPU Simulation Object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+    Param<int> clock;
+    Param<int> numThreads;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
+//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
+
+SimObjectParam<FunctionalMemory *> mem;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> cachePorts;
+Param<unsigned> width;
+Param<unsigned> frontEndWidth;
+Param<unsigned> backEndWidth;
+Param<unsigned> backEndSquashLatency;
+Param<unsigned> backEndLatency;
+Param<unsigned> maxInstBufferSize;
+Param<unsigned> numPhysicalRegs;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<bool> decoupledFrontEnd;
+Param<int> dispatchWidth;
+Param<int> wbWidth;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string>   smtFetchPolicy;
+Param<std::string>   smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string>   smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string>   smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string>   smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+    INIT_PARAM(clock, "clock speed"),
+    INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#if FULL_SYSTEM
+    INIT_PARAM(system, "System object"),
+    INIT_PARAM(cpu_id, "processor ID"),
+    INIT_PARAM(itb, "Instruction translation buffer"),
+    INIT_PARAM(dtb, "Data translation buffer"),
+#else
+    INIT_PARAM(workload, "Processes to run"),
+//    INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+    INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+    INIT_PARAM_DFLT(max_insts_any_thread,
+                    "Terminate when any thread reaches this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_insts_all_threads,
+                    "Terminate when all threads have reached"
+                    "this inst count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_any_thread,
+                    "Terminate when any thread reaches this load count",
+                    0),
+    INIT_PARAM_DFLT(max_loads_all_threads,
+                    "Terminate when all threads have reached this load"
+                    "count",
+                    0),
+
+    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+    INIT_PARAM_DFLT(width, "Width", 1),
+    INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+    INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+    INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+    INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+    INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+    INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+
+    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+               "delay"),
+    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+    INIT_PARAM(fetchWidth, "Fetch width"),
+    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+               "delay"),
+    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+    INIT_PARAM(decodeWidth, "Decode width"),
+
+    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+               "delay"),
+    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+    INIT_PARAM(renameWidth, "Rename width"),
+
+    INIT_PARAM(commitToIEWDelay, "Commit to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(renameToIEWDelay, "Rename to "
+               "Issue/Execute/Writeback delay"),
+    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+               "to the IEW stage)"),
+    INIT_PARAM(issueWidth, "Issue width"),
+    INIT_PARAM(executeWidth, "Execute width"),
+    INIT_PARAM(executeIntWidth, "Integer execute width"),
+    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+    INIT_PARAM(executeBranchWidth, "Branch execute width"),
+    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+
+    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+               "delay"),
+    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+    INIT_PARAM(commitWidth, "Commit width"),
+    INIT_PARAM(squashWidth, "Squash width"),
+
+    INIT_PARAM(localPredictorSize, "Size of local predictor"),
+    INIT_PARAM(localCtrBits, "Bits per counter"),
+    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+    INIT_PARAM(localHistoryBits, "Bits for the local history"),
+    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+    INIT_PARAM(globalCtrBits, "Bits per counter"),
+    INIT_PARAM(globalHistoryBits, "Bits of history"),
+    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+    INIT_PARAM(BTBEntries, "Number of BTB entries"),
+    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+    INIT_PARAM(RASSize, "RAS size"),
+
+    INIT_PARAM(LQEntries, "Number of load queue entries"),
+    INIT_PARAM(SQEntries, "Number of store queue entries"),
+    INIT_PARAM(LFSTSize, "Last fetched store table size"),
+    INIT_PARAM(SSITSize, "Store set ID table size"),
+
+    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+               "registers"),
+    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+    INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+    INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+    INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+
+    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
+    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
+    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+CREATE_SIM_OBJECT(SimpleOzoneCPU)
+{
+    SimpleOzoneCPU *cpu;
+
+#if FULL_SYSTEM
+    // Full-system only supports a single thread for the moment.
+    int actual_num_threads = 1;
+#else
+    // In non-full-system mode, we infer the number of threads from
+    // the workload if it's not explicitly specified.
+    int actual_num_threads =
+        numThreads.isValid() ? numThreads : workload.size();
+
+    if (workload.size() == 0) {
+        fatal("Must specify at least one workload!");
+    }
+
+#endif
+
+    SimpleParams *params = new SimpleParams;
+
+    params->clock = clock;
+
+    params->name = getInstanceName();
+    params->numberOfThreads = actual_num_threads;
+
+#if FULL_SYSTEM
+    params->system = system;
+    params->cpu_id = cpu_id;
+    params->itb = itb;
+    params->dtb = dtb;
+#else
+    params->workload = workload;
+//    params->pTable = page_table;
+#endif // FULL_SYSTEM
+
+    params->mem = mem;
+
+    params->max_insts_any_thread = max_insts_any_thread;
+    params->max_insts_all_threads = max_insts_all_threads;
+    params->max_loads_any_thread = max_loads_any_thread;
+    params->max_loads_all_threads = max_loads_all_threads;
+
+    //
+    // Caches
+    //
+    params->icacheInterface = icache ? icache->getInterface() : NULL;
+    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+    params->cachePorts = cachePorts;
+
+    params->width = width;
+    params->frontEndWidth = frontEndWidth;
+    params->backEndWidth = backEndWidth;
+    params->backEndSquashLatency = backEndSquashLatency;
+    params->backEndLatency = backEndLatency;
+    params->maxInstBufferSize = maxInstBufferSize;
+    params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+
+    params->decodeToFetchDelay = decodeToFetchDelay;
+    params->renameToFetchDelay = renameToFetchDelay;
+    params->iewToFetchDelay = iewToFetchDelay;
+    params->commitToFetchDelay = commitToFetchDelay;
+    params->fetchWidth = fetchWidth;
+
+    params->renameToDecodeDelay = renameToDecodeDelay;
+    params->iewToDecodeDelay = iewToDecodeDelay;
+    params->commitToDecodeDelay = commitToDecodeDelay;
+    params->fetchToDecodeDelay = fetchToDecodeDelay;
+    params->decodeWidth = decodeWidth;
+
+    params->iewToRenameDelay = iewToRenameDelay;
+    params->commitToRenameDelay = commitToRenameDelay;
+    params->decodeToRenameDelay = decodeToRenameDelay;
+    params->renameWidth = renameWidth;
+
+    params->commitToIEWDelay = commitToIEWDelay;
+    params->renameToIEWDelay = renameToIEWDelay;
+    params->issueToExecuteDelay = issueToExecuteDelay;
+    params->issueWidth = issueWidth;
+    params->executeWidth = executeWidth;
+    params->executeIntWidth = executeIntWidth;
+    params->executeFloatWidth = executeFloatWidth;
+    params->executeBranchWidth = executeBranchWidth;
+    params->executeMemoryWidth = executeMemoryWidth;
+
+    params->iewToCommitDelay = iewToCommitDelay;
+    params->renameToROBDelay = renameToROBDelay;
+    params->commitWidth = commitWidth;
+    params->squashWidth = squashWidth;
+
+
+    params->localPredictorSize = localPredictorSize;
+    params->localCtrBits = localCtrBits;
+    params->localHistoryTableSize = localHistoryTableSize;
+    params->localHistoryBits = localHistoryBits;
+    params->globalPredictorSize = globalPredictorSize;
+    params->globalCtrBits = globalCtrBits;
+    params->globalHistoryBits = globalHistoryBits;
+    params->choicePredictorSize = choicePredictorSize;
+    params->choiceCtrBits = choiceCtrBits;
+
+    params->BTBEntries = BTBEntries;
+    params->BTBTagSize = BTBTagSize;
+
+    params->RASSize = RASSize;
+
+    params->LQEntries = LQEntries;
+    params->SQEntries = SQEntries;
+
+    params->SSITSize = SSITSize;
+    params->LFSTSize = LFSTSize;
+
+    params->numPhysIntRegs = numPhysIntRegs;
+    params->numPhysFloatRegs = numPhysFloatRegs;
+    params->numIQEntries = numIQEntries;
+    params->numROBEntries = numROBEntries;
+
+    params->decoupledFrontEnd = decoupledFrontEnd;
+    params->dispatchWidth = dispatchWidth;
+    params->wbWidth = wbWidth;
+
+    params->smtNumFetchingThreads = smtNumFetchingThreads;
+    params->smtFetchPolicy = smtFetchPolicy;
+    params->smtIQPolicy    = smtIQPolicy;
+    params->smtLSQPolicy    = smtLSQPolicy;
+    params->smtLSQThreshold = smtLSQThreshold;
+    params->smtROBPolicy   = smtROBPolicy;
+    params->smtROBThreshold = smtROBThreshold;
+    params->smtCommitPolicy = smtCommitPolicy;
+
+    params->instShiftAmt = 2;
+
+    params->deferRegistration = defer_registration;
+
+    params->functionTrace = function_trace;
+    params->functionTraceStart = function_trace_start;
+
+    cpu = new SimpleOzoneCPU(params);
+
+    return cpu;
+}
+
+REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU)
+
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index e7ed3cfe0..36ec30b2c 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -26,23 +26,1137 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __CPU_OOO_CPU_OOO_IMPL_HH__
-#define __CPU_OOO_CPU_OOO_IMPL_HH__
+#include <cstdio>
+#include <cstdlib>
 
-#include "arch/isa_traits.hh"
+#include "arch/isa_traits.hh" // For MachInst
+#include "base/trace.hh"
+#include "config/full_system.hh"
+#include "cpu/base.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/ozone/cpu.hh"
+#include "cpu/quiesce_event.hh"
+#include "cpu/static_inst.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+
+#if FULL_SYSTEM
+#include "arch/faults.hh"
+#include "arch/alpha/osfpal.hh"
+#include "arch/alpha/tlb.hh"
+#include "arch/vtophys.hh"
+#include "base/callback.hh"
+#include "base/remote_gdb.hh"
+#include "cpu/profile.hh"
+#include "kern/kernel_stats.hh"
+#include "mem/functional/memory_control.hh"
+#include "mem/functional/physical.hh"
+#include "sim/faults.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_exit.hh"
+#include "sim/system.hh"
+#else // !FULL_SYSTEM
+#include "mem/functional/functional.hh"
+#include "sim/process.hh"
+#endif // FULL_SYSTEM
+
+using namespace TheISA;
+
+template <class Impl>
+template<typename T>
+void
+OzoneCPU<Impl>::trace_data(T data) {
+    if (traceData) {
+        traceData->setData(data);
+    }
+}
+
+template <class Impl>
+OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
+    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
+{
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::TickEvent::process()
+{
+    cpu->tick();
+}
+
+template <class Impl>
+const char *
+OzoneCPU<Impl>::TickEvent::description()
+{
+    return "OzoneCPU tick event";
+}
+/*
+template <class Impl>
+OzoneCPU<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(OzoneCPU *_cpu)
+    : Event(&mainEventQueue),
+      cpu(_cpu)
+{
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::ICacheCompletionEvent::process()
+{
+    cpu->processICacheCompletion();
+}
+
+template <class Impl>
+const char *
+OzoneCPU<Impl>::ICacheCompletionEvent::description()
+{
+    return "OzoneCPU I-cache completion event";
+}
+
+template <class Impl>
+OzoneCPU<Impl>::DCacheCompletionEvent::
+DCacheCompletionEvent(OzoneCPU *_cpu,
+                      DynInstPtr &_inst,
+                      DCacheCompEventIt &_dcceIt)
+    : Event(&mainEventQueue),
+      cpu(_cpu),
+      inst(_inst),
+      dcceIt(_dcceIt)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::DCacheCompletionEvent::process()
+{
+    inst->setCompleted();
+
+    // Maybe remove the EA from the list of addrs?
+    cpu->eaList.clearAddr(inst->seqNum, inst->getEA());
+    cpu->dCacheCompList.erase(this->dcceIt);
+}
+
+template <class Impl>
+const char *
+OzoneCPU<Impl>::DCacheCompletionEvent::description()
+{
+    return "OzoneCPU D-cache completion event";
+}
+*/
+template <class Impl>
+OzoneCPU<Impl>::OzoneCPU(Params *p)
+#if FULL_SYSTEM
+    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width),
+#else
+    : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
+#endif
+      comm(5, 5)
+{
+    frontEnd = new FrontEnd(p);
+    backEnd = new BackEnd(p);
+
+    _status = Idle;
+    thread.xcProxy = &xcProxy;
+
+    thread.inSyscall = false;
+
+    xcProxy.cpu = this;
+    xcProxy.thread = &thread;
+
+    thread.setStatus(ExecContext::Suspended);
+#if FULL_SYSTEM
+//    xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem);
+
+    /***** All thread state stuff *****/
+    thread.cpu = this;
+    thread.tid = 0;
+    thread.mem = p->mem;
+
+    thread.quiesceEvent = new EndQuiesceEvent(&xcProxy);
+
+    system = p->system;
+    itb = p->itb;
+    dtb = p->dtb;
+    memctrl = p->system->memctrl;
+    physmem = p->system->physmem;
+
+    if (p->profile) {
+        thread.profile = new FunctionProfile(p->system->kernelSymtab);
+        Callback *cb =
+            new MakeCallback<OzoneXC,
+            &OzoneXC::dumpFuncProfile>(&xcProxy);
+        registerExitCallback(cb);
+    }
+
+    // let's fill with a dummy node for now so we don't get a segfault
+    // on the first cycle when there's no node available.
+    static ProfileNode dummyNode;
+    thread.profileNode = &dummyNode;
+    thread.profilePC = 3;
+
+#else
+//    xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0);
+    thread.cpu = this;
+    thread.tid = 0;
+    thread.process = p->workload[0];
+//    thread.mem = thread.process->getMemory();
+    thread.asid = 0;
+#endif // !FULL_SYSTEM
+/*
+    icacheInterface = p->icache_interface;
+    dcacheInterface = p->dcache_interface;
+
+    cacheMemReq = new MemReq();
+    cacheMemReq->xc = xc;
+    cacheMemReq->asid = 0;
+    cacheMemReq->data = new uint8_t[64];
+*/
+    numInst = 0;
+    startNumInst = 0;
+/*    numLoad = 0;
+    startNumLoad = 0;
+    lastIcacheStall = 0;
+    lastDcacheStall = 0;
+
+    issueWidth = p->issueWidth;
+*/
+    execContexts.push_back(&xcProxy);
+
+    frontEnd->setCPU(this);
+    backEnd->setCPU(this);
+
+    frontEnd->setXC(&xcProxy);
+    backEnd->setXC(&xcProxy);
+
+    frontEnd->setThreadState(&thread);
+    backEnd->setThreadState(&thread);
+
+    frontEnd->setCommBuffer(&comm);
+    backEnd->setCommBuffer(&comm);
+
+    frontEnd->setBackEnd(backEnd);
+    backEnd->setFrontEnd(frontEnd);
+
+    decoupledFrontEnd = p->decoupledFrontEnd;
+
+    globalSeqNum = 1;
+
+    checkInterrupts = false;
+/*
+    fetchRedirBranch = true;
+    fetchRedirExcp = true;
+
+    // Need to initialize the rename maps, and the head and tail pointers.
+    robHeadPtr = new DynInst(this);
+    robTailPtr = new DynInst(this);
+
+    robHeadPtr->setNextInst(robTailPtr);
+//    robHeadPtr->setPrevInst(NULL);
+//    robTailPtr->setNextInst(NULL);
+    robTailPtr->setPrevInst(robHeadPtr);
+
+    robHeadPtr->setCompleted();
+    robTailPtr->setCompleted();
+
+    for (int i = 0; i < ISA::TotalNumRegs; ++i) {
+        renameTable[i] = new DynInst(this);
+        commitTable[i] = new DynInst(this);
+
+        renameTable[i]->setCompleted();
+        commitTable[i]->setCompleted();
+    }
+
+#if FULL_SYSTEM
+    for (int i = 0; i < ISA::NumIntRegs; ++i) {
+        palShadowTable[i] = new DynInst(this);
+        palShadowTable[i]->setCompleted();
+    }
+#endif
+
+    // Size of cache block.
+    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+    // Create mask to get rid of offset bits.
+    cacheBlkMask = (cacheBlkSize - 1);
+
+    // Get the size of an instruction.
+    instSize = sizeof(MachInst);
+
+    // Create space to store a cache line.
+    cacheData = new uint8_t[cacheBlkSize];
+
+    cacheBlkValid = false;
+*/
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        thread.renameTable[i] = new DynInst(this);
+        thread.renameTable[i]->setCompleted();
+    }
+
+    frontEnd->renameTable.copyFrom(thread.renameTable);
+    backEnd->renameTable.copyFrom(thread.renameTable);
+
+#if !FULL_SYSTEM
+    pTable = p->pTable;
+#endif
+
+    DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
+}
+
+template <class Impl>
+OzoneCPU<Impl>::~OzoneCPU()
+{
+}
+/*
+template <class Impl>
+void
+OzoneCPU<Impl>::copyFromXC()
+{
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        if (i < TheISA::NumIntRegs) {
+            renameTable[i]->setIntResult(xc->readIntReg(i));
+        } else if (i < TheISA::NumFloatRegs) {
+            renameTable[i]->setDoubleResult(xc->readFloatRegDouble(i));
+        }
+    }
+
+    DPRINTF(OzoneCPU, "Func Exe inst is: %i\n", xc->func_exe_inst);
+    backEnd->funcExeInst = xc->func_exe_inst;
+//    PC = xc->readPC();
+//    nextPC = xc->regs.npc;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::copyToXC()
+{
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        if (i < TheISA::NumIntRegs) {
+            xc->setIntReg(i, renameTable[i]->readIntResult());
+        } else if (i < TheISA::NumFloatRegs) {
+            xc->setFloatRegDouble(i, renameTable[i]->readDoubleResult());
+        }
+    }
+
+    this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs[tid].fpcr;
+    this->xc->regs.miscRegs.uniq = this->regFile.miscRegs[tid].uniq;
+    this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs[tid].lock_flag;
+    this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs[tid].lock_addr;
+
+    xc->func_exe_inst = backEnd->funcExeInst;
+    xc->regs.pc = PC;
+    xc->regs.npc = nextPC;
+}
+*/
+template <class Impl>
+void
+OzoneCPU<Impl>::switchOut()
+{
+    _status = SwitchedOut;
+    if (tickEvent.scheduled())
+        tickEvent.squash();
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
+
+    assert(!tickEvent.scheduled());
+
+    // if any of this CPU's ExecContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active &&
+            _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+        }
+    }
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::activateContext(int thread_num, int delay)
+{
+    // Eventually change this in SMT.
+    assert(thread_num == 0);
+//    assert(xcProxy);
+
+    assert(_status == Idle);
+    notIdleFraction++;
+    scheduleTickEvent(delay);
+    _status = Running;
+    thread._status = ExecContext::Active;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::suspendContext(int thread_num)
+{
+    // Eventually change this in SMT.
+    assert(thread_num == 0);
+//    assert(xcProxy);
+
+    assert(_status == Running);
+    notIdleFraction--;
+    unscheduleTickEvent();
+    _status = Idle;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::deallocateContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::haltContext(int thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::regStats()
+{
+    using namespace Stats;
+
+    BaseCPU::regStats();
+
+    thread.numInsts
+        .name(name() + ".num_insts")
+        .desc("Number of instructions executed")
+        ;
+
+    thread.numMemRefs
+        .name(name() + ".num_refs")
+        .desc("Number of memory references")
+        ;
+
+    notIdleFraction
+        .name(name() + ".not_idle_fraction")
+        .desc("Percentage of non-idle cycles")
+        ;
+
+    idleFraction
+        .name(name() + ".idle_fraction")
+        .desc("Percentage of idle cycles")
+        ;
+
+    idleFraction = constant(1.0) - notIdleFraction;
+
+    frontEnd->regStats();
+    backEnd->regStats();
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::resetStats()
+{
+    startNumInst = numInst;
+    notIdleFraction = (_status != Idle);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::init()
+{
+    BaseCPU::init();
+/*
+    copyFromXC();
+
+    // ALso copy over PC/nextPC.  This isn't normally copied in "copyFromXC()"
+    // so that the XC doesn't mess up the PC when returning from a syscall.
+    PC = xc->readPC();
+    nextPC = xc->regs.npc;
+*/
+    // Mark this as in syscall so it won't need to squash
+    thread.inSyscall = true;
+#if FULL_SYSTEM
+    for (int i = 0; i < execContexts.size(); ++i) {
+        ExecContext *xc = execContexts[i];
+
+        // initialize CPU, including PC
+        TheISA::initCPU(xc, xc->readCpuId());
+    }
+#endif
+    frontEnd->renameTable.copyFrom(thread.renameTable);
+    backEnd->renameTable.copyFrom(thread.renameTable);
+
+    thread.inSyscall = false;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::serialize(std::ostream &os)
+{
+    // At this point, all DCacheCompEvents should be processed.
+
+    BaseCPU::serialize(os);
+    SERIALIZE_ENUM(_status);
+    nameOut(os, csprintf("%s.xc", name()));
+    xcProxy.serialize(os);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+{
+    BaseCPU::unserialize(cp, section);
+    UNSERIALIZE_ENUM(_status);
+    xcProxy.unserialize(cp, csprintf("%s.xc", section));
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+}
+
+template <class Impl>
+Fault
+OzoneCPU<Impl>::copySrcTranslate(Addr src)
+{
+    panic("Copy not implemented!\n");
+    return NoFault;
+#if 0
+    static bool no_warn = true;
+    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+    // Only support block sizes of 64 atm.
+    assert(blk_size == 64);
+    int offset = src & (blk_size - 1);
+
+    // Make sure block doesn't span page
+    if (no_warn &&
+        (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) &&
+        (src >> 40) != 0xfffffc) {
+        warn("Copied block source spans pages %x.", src);
+        no_warn = false;
+    }
+
+    memReq->reset(src & ~(blk_size - 1), blk_size);
+
+    // translate to physical address
+    Fault fault = xc->translateDataReadReq(memReq);
+
+    assert(fault != Alignment_Fault);
+
+    if (fault == NoFault) {
+        xc->copySrcAddr = src;
+        xc->copySrcPhysAddr = memReq->paddr + offset;
+    } else {
+        xc->copySrcAddr = 0;
+        xc->copySrcPhysAddr = 0;
+    }
+    return fault;
+#endif
+}
+
+template <class Impl>
+Fault
+OzoneCPU<Impl>::copy(Addr dest)
+{
+    panic("Copy not implemented!\n");
+    return NoFault;
+#if 0
+    static bool no_warn = true;
+    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+    // Only support block sizes of 64 atm.
+    assert(blk_size == 64);
+    uint8_t data[blk_size];
+    //assert(xc->copySrcAddr);
+    int offset = dest & (blk_size - 1);
+
+    // Make sure block doesn't span page
+    if (no_warn &&
+        (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) &&
+        (dest >> 40) != 0xfffffc) {
+        no_warn = false;
+        warn("Copied block destination spans pages %x. ", dest);
+    }
+
+    memReq->reset(dest & ~(blk_size -1), blk_size);
+    // translate to physical address
+    Fault fault = xc->translateDataWriteReq(memReq);
+
+    assert(fault != Alignment_Fault);
+
+    if (fault == NoFault) {
+        Addr dest_addr = memReq->paddr + offset;
+        // Need to read straight from memory since we have more than 8 bytes.
+        memReq->paddr = xc->copySrcPhysAddr;
+        xc->mem->read(memReq, data);
+        memReq->paddr = dest_addr;
+        xc->mem->write(memReq, data);
+        if (dcacheInterface) {
+            memReq->cmd = Copy;
+            memReq->completionEvent = NULL;
+            memReq->paddr = xc->copySrcPhysAddr;
+            memReq->dest = dest_addr;
+            memReq->size = 64;
+            memReq->time = curTick;
+            dcacheInterface->access(memReq);
+        }
+    }
+    return fault;
+#endif
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+Addr
+OzoneCPU<Impl>::dbg_vtophys(Addr addr)
+{
+    return vtophys(&xcProxy, addr);
+}
+#endif // FULL_SYSTEM
+/*
+template <class Impl>
+void
+OzoneCPU<Impl>::processICacheCompletion()
+{
+    switch (status()) {
+      case IcacheMiss:
+        DPRINTF(OzoneCPU, "OzoneCPU: Finished Icache miss.\n");
+
+        icacheStallCycles += curTick - lastIcacheStall;
+        _status = IcacheMissComplete;
+        cacheBlkValid = true;
+//	scheduleTickEvent(1);
+        break;
+      case SwitchedOut:
+        // If this CPU has been switched out due to sampling/warm-up,
+        // ignore any further status changes (e.g., due to cache
+        // misses outstanding at the time of the switch).
+        return;
+      default:
+        panic("OzoneCPU::processICacheCompletion: bad state");
+        break;
+    }
+}
+*/
+#if FULL_SYSTEM
+template <class Impl>
+void
+OzoneCPU<Impl>::post_interrupt(int int_num, int index)
+{
+    BaseCPU::post_interrupt(int_num, index);
+
+    if (thread._status == ExecContext::Suspended) {
+        DPRINTF(IPI,"Suspended Processor awoke\n");
+//	thread.activate();
+        // Hack for now.  Otherwise might have to go through the xcProxy, or
+        // I need to figure out what's the right thing to call.
+        activateContext(thread.tid, 1);
+    }
+}
+#endif // FULL_SYSTEM
+
+/* start simulation, program loaded, processor precise state initialized */
+template <class Impl>
+void
+OzoneCPU<Impl>::tick()
+{
+    DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n");
+
+    thread.renameTable[ZeroReg]->setIntResult(0);
+    thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]->
+        setDoubleResult(0.0);
+
+    // General code flow:
+    // Check for any interrupts.  Handle them if I do have one.
+    // Check if I have a need to fetch a new cache block.  Either a bit could be
+    // set by functions indicating that I need to fetch a new block, or I could
+    // hang onto the last PC of the last cache block I fetched and compare the
+    // current PC to that.  Setting a bit seems nicer but may be more error
+    // prone.
+    // Scan through the IQ to figure out if there's anything I can issue/execute
+    // Might need something close to the FU Pools to tell what instructions
+    // I can issue.  How to handle loads and stores vs other insts?
+    // Extremely slow way: find first inst that can possibly issue; if it's a
+    // load or a store, then iterate through load/store queue.
+    // If I can't find instructions to execute and I've got room in the IQ
+    // (which is just a counter), then grab a few instructions out of the cache
+    // line buffer until I either run out or can execute up until my limit.
+
+    numCycles++;
+
+    traceData = NULL;
+
+//    Fault fault = NoFault;
+
+#if 0 // FULL_SYSTEM
+    if (checkInterrupts && check_interrupts() && !inPalMode() &&
+        status() != IcacheMissComplete) {
+        int ipl = 0;
+        int summary = 0;
+        checkInterrupts = false;
+
+        if (readMiscReg(IPR_SIRR)) {
+            for (int i = INTLEVEL_SOFTWARE_MIN;
+                 i < INTLEVEL_SOFTWARE_MAX; i++) {
+                if (readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+                    // See table 4-19 of 21164 hardware reference
+                    ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+                    summary |= (ULL(1) << i);
+                }
+            }
+        }
+
+        // Is this method so that if the interrupts are switched over from
+        // another CPU they'll still be handled?
+//	uint64_t interrupts = cpuXC->cpu->intr_status();
+        uint64_t interrupts = intr_status();
+        for (int i = INTLEVEL_EXTERNAL_MIN;
+            i < INTLEVEL_EXTERNAL_MAX; i++) {
+            if (interrupts & (ULL(1) << i)) {
+                // See table 4-19 of 21164 hardware reference
+                ipl = i;
+                summary |= (ULL(1) << i);
+            }
+        }
+
+        if (readMiscReg(IPR_ASTRR))
+            panic("asynchronous traps not implemented\n");
+
+        if (ipl && ipl > readMiscReg(IPR_IPLR)) {
+            setMiscReg(IPR_ISR, summary);
+            setMiscReg(IPR_INTID, ipl);
+
+            Fault(new InterruptFault)->invoke(xc);
+
+            DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+                    readMiscReg(IPR_IPLR), ipl, summary);
+        }
+    }
+#endif
+
+    // Make call to ISA to ensure 0 register semantics...actually because the
+    // DynInsts will generally be the register file, this should only have to
+    // happen when the xc is actually written to (during a syscall or something)
+    // maintain $r0 semantics
+//    assert(renameTable[ZeroReg]->readIntResult() == 0);
+#ifdef TARGET_ALPHA
+//    assert(renameTable[ZeroReg]->readDoubleResult() == 0);
+#endif // TARGET_ALPHA
+
+    comm.advance();
+    frontEnd->tick();
+    backEnd->tick();
+
+    // Do this here?  For now the front end will control the PC.
+//    PC = nextPC;
+
+    // check for instruction-count-based events
+    comInstEventQueue[0]->serviceEvents(numInst);
+
+    if (!tickEvent.scheduled())
+        tickEvent.schedule(curTick + 1);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::squashFromXC()
+{
+    thread.inSyscall = true;
+    backEnd->squashFromXC();
+}
+
+#if !FULL_SYSTEM
+template <class Impl>
+void
+OzoneCPU<Impl>::syscall()
+{
+    // Not sure this copy is needed, depending on how the XC proxy is made.
+    thread.renameTable.copyFrom(backEnd->renameTable);
+
+    thread.inSyscall = true;
+
+    thread.funcExeInst++;
+
+    DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
+
+    thread.process->syscall(&xcProxy);
+
+    thread.funcExeInst--;
+
+    thread.inSyscall = false;
+
+    frontEnd->renameTable.copyFrom(thread.renameTable);
+    backEnd->renameTable.copyFrom(thread.renameTable);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
+{
+    // check for error condition.  Alpha syscall convention is to
+    // indicate success/failure in reg a3 (r19) and put the
+    // return value itself in the standard return value reg (v0).
+    if (return_value.successful()) {
+        // no error
+        thread.renameTable[SyscallSuccessReg]->setIntResult(0);
+        thread.renameTable[ReturnValueReg]->setIntResult(return_value.value());
+    } else {
+        // got an error, return details
+        thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1);
+        thread.renameTable[ReturnValueReg]->setIntResult(-return_value.value());
+    }
+}
+#else
+template <class Impl>
+Fault
+OzoneCPU<Impl>::hwrei()
+{
+    // Need to move this to ISA code
+    // May also need to make this per thread
+    if (!inPalMode())
+        return new UnimplementedOpcodeFault;
+
+    thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
+
+    // Not sure how to make a similar check in the Ozone model
+//    if (!misspeculating()) {
+        kernelStats->hwrei();
+
+        checkInterrupts = true;
+//    }
+
+    // FIXME: XXX check for interrupts? XXX
+    return NoFault;
+}
+
+template <class Impl>
+bool
+OzoneCPU<Impl>::simPalCheck(int palFunc)
+{
+    // Need to move this to ISA code
+    // May also need to make this per thread
+    this->kernelStats->callpal(palFunc, &xcProxy);
+
+    switch (palFunc) {
+      case PAL::halt:
+        haltContext(thread.tid);
+        if (--System::numSystemsRunning == 0)
+            new SimExitEvent("all cpus halted");
+        break;
+
+      case PAL::bpt:
+      case PAL::bugchk:
+        if (system->breakpoint())
+            return false;
+        break;
+    }
+
+    return true;
+}
+#endif
+
+template <class Impl>
+BaseCPU *
+OzoneCPU<Impl>::OzoneXC::getCpuPtr()
+{
+    return cpu;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setCpuId(int id)
+{
+    cpu->cpuId = id;
+    thread->cpuId = id;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setStatus(Status new_status)
+{
+//    cpu->_status = new_status;
+    thread->_status = new_status;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::activate(int delay)
+{
+    cpu->activateContext(thread->tid, delay);
+}
+
+/// Set the status to Suspended.
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::suspend()
+{
+    cpu->suspendContext(thread->tid);
+}
+
+/// Set the status to Unallocated.
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::deallocate()
+{
+    cpu->deallocateContext(thread->tid);
+}
+
+/// Set the status to Halted.
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::halt()
+{
+    cpu->haltContext(thread->tid);
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
+{ }
+#endif
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
+{ }
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::regStats(const std::string &name)
+{ }
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::serialize(std::ostream &os)
+{ }
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::unserialize(Checkpoint *cp, const std::string &section)
+{ }
+
+#if FULL_SYSTEM
+template <class Impl>
+Event *
+OzoneCPU<Impl>::OzoneXC::getQuiesceEvent()
+{
+    return thread->quiesceEvent;
+}
+
+template <class Impl>
+Tick
+OzoneCPU<Impl>::OzoneXC::readLastActivate()
+{
+    return thread->lastActivate;
+}
+
+template <class Impl>
+Tick
+OzoneCPU<Impl>::OzoneXC::readLastSuspend()
+{
+    return thread->lastSuspend;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::profileClear()
+{
+    if (thread->profile)
+        thread->profile->clear();
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::profileSample()
+{
+    if (thread->profile)
+        thread->profile->sample(thread->profileNode, thread->profilePC);
+}
+#endif
+
+template <class Impl>
+int
+OzoneCPU<Impl>::OzoneXC::getThreadNum()
+{
+    return thread->tid;
+}
+
+// Also somewhat obnoxious.  Really only used for the TLB fault.
+template <class Impl>
+TheISA::MachInst
+OzoneCPU<Impl>::OzoneXC::getInst()
+{
+    return thread->inst;
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
+{
+    thread->PC = xc->readPC();
+    thread->nextPC = xc->readNextPC();
+
+    cpu->frontEnd->setPC(thread->PC);
+    cpu->frontEnd->setNextPC(thread->nextPC);
+
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        if (i < TheISA::FP_Base_DepTag) {
+            thread->renameTable[i]->setIntResult(xc->readIntReg(i));
+        } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
+            int fp_idx = i - TheISA::FP_Base_DepTag;
+            thread->renameTable[i]->setDoubleResult(
+                xc->readFloatRegDouble(fp_idx));
+        }
+    }
+
+#if !FULL_SYSTEM
+    thread->funcExeInst = xc->readFuncExeInst();
+#endif
+
+    // Need to copy the XC values into the current rename table,
+    // copy the misc regs.
+    thread->regs.miscRegs.copyMiscRegs(xc);
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::clearArchRegs()
+{
+    panic("Unimplemented!");
+}
 
 template <class Impl>
-class OoOCPU;
+uint64_t
+OzoneCPU<Impl>::OzoneXC::readIntReg(int reg_idx)
+{
+    return thread->renameTable[reg_idx]->readIntResult();
+}
+
+template <class Impl>
+float
+OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
+{
+    return thread->renameTable[reg_idx]->readFloatResult();
+}
+
+template <class Impl>
+double
+OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
+{
+    return thread->renameTable[reg_idx]->readDoubleResult();
+}
+
+template <class Impl>
+uint64_t
+OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
+{
+    return thread->renameTable[reg_idx]->readIntResult();
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setIntReg(int reg_idx, uint64_t val)
+{
+    thread->renameTable[reg_idx]->setIntResult(val);
+
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setFloatRegSingle(int reg_idx, float val)
+{
+    panic("Unimplemented!");
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
+{
+    thread->renameTable[reg_idx]->setDoubleResult(val);
+
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val)
+{
+    panic("Unimplemented!");
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setPC(Addr val)
+{
+    thread->PC = val;
+    cpu->frontEnd->setPC(val);
+
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setNextPC(Addr val)
+{
+    thread->nextPC = val;
+    cpu->frontEnd->setNextPC(val);
+
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
+}
+
+template <class Impl>
+TheISA::MiscReg
+OzoneCPU<Impl>::OzoneXC::readMiscReg(int misc_reg)
+{
+    return thread->regs.miscRegs.readReg(misc_reg);
+}
+
+template <class Impl>
+TheISA::MiscReg
+OzoneCPU<Impl>::OzoneXC::readMiscRegWithEffect(int misc_reg, Fault &fault)
+{
+    return thread->regs.miscRegs.readRegWithEffect(misc_reg,
+                                                   fault, this);
+}
+
+template <class Impl>
+Fault
+OzoneCPU<Impl>::OzoneXC::setMiscReg(int misc_reg, const MiscReg &val)
+{
+    // Needs to setup a squash event unless we're in syscall mode
+    Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val);
+
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
+
+    return ret_fault;
+}
 
 template <class Impl>
-class OoODynInst;
+Fault
+OzoneCPU<Impl>::OzoneXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+{
+    // Needs to setup a squash event unless we're in syscall mode
+    Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val,
+                                                             this);
 
-struct OoOImpl {
-    typedef AlphaISA ISA;
-    typedef OoOCPU<OoOImpl> OoOCPU;
-    typedef OoOCPU FullCPU;
-    typedef OoODynInst<OoOImpl> DynInst;
-    typedef RefCountingPtr<DynInst> DynInstPtr;
-};
+    if (!thread->inSyscall) {
+        cpu->squashFromXC();
+    }
 
-#endif // __CPU_OOO_CPU_OOO_IMPL_HH__
+    return ret_fault;
+}
diff --git a/cpu/ozone/dyn_inst.cc b/cpu/ozone/dyn_inst.cc
new file mode 100644
index 000000000..3bf8b03ca
--- /dev/null
+++ b/cpu/ozone/dyn_inst.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/dyn_inst_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class OzoneDynInst<OzoneImpl>;
+template class OzoneDynInst<SimpleImpl>;
+
diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh
new file mode 100644
index 000000000..4382af0fd
--- /dev/null
+++ b/cpu/ozone/dyn_inst.hh
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_DYN_INST_HH__
+#define __CPU_OZONE_DYN_INST_HH__
+
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/ozone/cpu.hh"   // MUST include this
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this
+#include "cpu/ozone/ozone_impl.hh"
+
+#include <list>
+#include <vector>
+
+template <class Impl>
+class OzoneDynInst : public BaseDynInst<Impl>
+{
+  public:
+    // Typedefs
+    typedef typename Impl::FullCPU FullCPU;
+
+    typedef typename FullCPU::ImplState ImplState;
+
+    // Typedef for DynInstPtr.  This is really just a RefCountingPtr<OoODynInst>.
+    typedef typename Impl::DynInstPtr DynInstPtr;
+
+//    typedef typename Impl::BranchPred::BPredInfo BPredInfo;
+
+    typedef TheISA::ExtMachInst ExtMachInst;
+    typedef TheISA::MachInst MachInst;
+    typedef TheISA::MiscReg MiscReg;
+    typedef typename std::list<DynInstPtr>::iterator ListIt;
+
+    // Note that this is duplicated from the BaseDynInst class; I'm simply not
+    // sure the enum would carry through so I could use it in array
+    // declarations in this class.
+    enum {
+        MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
+        MaxInstDestRegs = TheISA::MaxInstDestRegs
+    };
+
+    OzoneDynInst(FullCPU *cpu);
+
+    OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+                 InstSeqNum seq_num, FullCPU *cpu);
+
+    OzoneDynInst(StaticInstPtr inst);
+
+    ~OzoneDynInst();
+
+    void setSrcInst(DynInstPtr &newSrcInst, int regIdx)
+    { srcInsts[regIdx] = newSrcInst; }
+
+    bool srcInstReady(int regIdx);
+
+    void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx)
+    { prevDestInst[regIdx] = oldDestInst; }
+
+    DynInstPtr &getPrevDestInst(int regIdx)
+    { return prevDestInst[regIdx]; }
+
+    void addDependent(DynInstPtr &dependent_inst);
+
+    std::vector<DynInstPtr> &getDependents() { return dependents; }
+
+    void wakeDependents();
+
+//    void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; }
+
+//    BPredInfo &getBPredInfo() { return bpInfo; }
+
+//    OzoneXC *thread;
+
+  private:
+    void initInstPtrs();
+
+    std::vector<DynInstPtr> dependents;
+
+    /** The instruction that produces the value of the source registers.  These
+     *  may be NULL if the value has already been read from the source
+     *  instruction.
+     */
+    DynInstPtr srcInsts[MaxInstSrcRegs];
+
+    /**
+     *  Previous rename instruction for this destination.
+     */
+    DynInstPtr prevDestInst[MaxInstSrcRegs];
+
+//    BPredInfo bpInfo;
+
+  public:
+
+    Fault initiateAcc();
+
+    Fault completeAcc();
+/*
+    template <class T>
+    Fault read(Addr addr, T &data, unsigned flags);
+
+    template <class T>
+    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+*/
+    // The register accessor methods provide the index of the
+    // instruction's operand (e.g., 0 or 1), not the architectural
+    // register index, to simplify the implementation of register
+    // renaming.  We find the architectural register index by indexing
+    // into the instruction's own operand index table.  Note that a
+    // raw pointer to the StaticInst is provided instead of a
+    // ref-counted StaticInstPtr to redice overhead.  This is fine as
+    // long as these methods don't copy the pointer into any long-term
+    // storage (which is pretty hard to imagine they would have reason
+    // to do).
+
+    uint64_t readIntReg(const StaticInst *si, int idx)
+    {
+        return srcInsts[idx]->readIntResult();
+    }
+
+    float readFloatRegSingle(const StaticInst *si, int idx)
+    {
+        return srcInsts[idx]->readFloatResult();
+    }
+
+    double readFloatRegDouble(const StaticInst *si, int idx)
+    {
+        return srcInsts[idx]->readDoubleResult();
+    }
+
+    uint64_t readFloatRegInt(const StaticInst *si, int idx)
+    {
+        return srcInsts[idx]->readIntResult();
+    }
+
+    /** @todo: Make results into arrays so they can handle multiple dest
+     *  registers.
+     */
+    void setIntReg(const StaticInst *si, int idx, uint64_t val)
+    {
+        this->instResult.integer = val;
+    }
+
+    void setFloatRegSingle(const StaticInst *si, int idx, float val)
+    {
+        this->instResult.fp = val;
+    }
+
+    void setFloatRegDouble(const StaticInst *si, int idx, double val)
+    {
+        this->instResult.dbl = val;
+    }
+
+    void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
+    {
+        this->instResult.integer = val;
+    }
+
+    void setIntResult(uint64_t result) { this->instResult.integer = result; }
+    void setDoubleResult(double result) { this->instResult.dbl = result; }
+
+    bool srcsReady();
+    bool eaSrcsReady();
+
+    Fault execute();
+
+    Fault executeEAComp()
+    { return NoFault; }
+
+    Fault executeMemAcc()
+    { return this->staticInst->memAccInst()->execute(this, this->traceData); }
+
+    void clearDependents();
+
+  public:
+    // ISA stuff
+    MiscReg readMiscReg(int misc_reg);
+
+    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
+
+    Fault setMiscReg(int misc_reg, const MiscReg &val);
+
+    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+
+#if FULL_SYSTEM
+    Fault hwrei();
+    int readIntrFlag();
+    void setIntrFlag(int val);
+    bool inPalMode();
+    void trap(Fault fault);
+    bool simPalCheck(int palFunc);
+#else
+    void syscall();
+#endif
+
+    ListIt iqIt;
+    bool iqItValid;
+};
+
+/*
+template<class Impl>
+template<class T>
+inline Fault
+OzoneDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
+{
+    Fault fault = this->cpu->read(addr, data, flags, this);
+
+    if (this->traceData) {
+        this->traceData->setAddr(addr);
+        this->traceData->setData(data);
+    }
+
+    return fault;
+}
+
+template<class Impl>
+template<class T>
+inline Fault
+OzoneDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+    Fault fault = this->cpu->write(data, addr, flags, res, this);
+
+    this->storeSize = sizeof(T);
+    this->storeData = data;
+
+    if (this->traceData) {
+        this->traceData->setAddr(addr);
+        this->traceData->setData(data);
+    }
+
+    return fault;
+}
+*/
+#endif // __CPU_OZONE_DYN_INST_HH__
diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh
new file mode 100644
index 000000000..2d86ced62
--- /dev/null
+++ b/cpu/ozone/dyn_inst_impl.hh
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "kern/kernel_stats.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
+    : BaseDynInst<Impl>(0, 0, 0, 0, cpu)
+{
+    this->setCompleted();
+
+    initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+                                 InstSeqNum seq_num, FullCPU *cpu)
+    : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+{
+    initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(StaticInstPtr _staticInst)
+    : BaseDynInst<Impl>(_staticInst)
+{
+    initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::~OzoneDynInst()
+{
+    DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum);
+    for (int i = 0; i < this->numSrcRegs(); ++i) {
+        srcInsts[i] = NULL;
+    }
+
+    for (int i = 0; i < this->numDestRegs(); ++i) {
+        prevDestInst[i] = NULL;
+    }
+
+    dependents.clear();
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::execute()
+{
+    // @todo: Pretty convoluted way to avoid squashing from happening when using
+    // the XC during an instruction's execution (specifically for instructions
+    // that have sideeffects that use the XC).  Fix this.
+    bool in_syscall = this->thread->inSyscall;
+    this->thread->inSyscall = true;
+
+    this->fault = this->staticInst->execute(this, this->traceData);
+
+    this->thread->inSyscall = in_syscall;
+
+    return this->fault;
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::initiateAcc()
+{
+    // @todo: Pretty convoluted way to avoid squashing from happening when using
+    // the XC during an instruction's execution (specifically for instructions
+    // that have sideeffects that use the XC).  Fix this.
+    bool in_syscall = this->thread->inSyscall;
+    this->thread->inSyscall = true;
+
+    this->fault = this->staticInst->initiateAcc(this, this->traceData);
+
+    this->thread->inSyscall = in_syscall;
+
+    return this->fault;
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::completeAcc()
+{
+    if (this->isLoad()) {
+        this->fault = this->staticInst->completeAcc(this->req->data,
+                                                    this,
+                                                    this->traceData);
+    } else if (this->isStore()) {
+        this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
+                                                    this,
+                                                    this->traceData);
+    } else {
+        panic("Unknown type!");
+    }
+
+    return this->fault;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::srcInstReady(int regIdx)
+{
+    return srcInsts[regIdx]->isCompleted();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::addDependent(DynInstPtr &dependent_inst)
+{
+    dependents.push_back(dependent_inst);
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::wakeDependents()
+{
+    for (int i = 0; i < dependents.size(); ++i) {
+        dependents[i]->markSrcRegReady();
+    }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::initInstPtrs()
+{
+    for (int i = 0; i < MaxInstSrcRegs; ++i) {
+        srcInsts[i] = NULL;
+    }
+    iqItValid = false;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::srcsReady()
+{
+    for (int i = 0; i < this->numSrcRegs(); ++i) {
+        if (!srcInsts[i]->isCompleted())
+            return false;
+    }
+
+    return true;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::eaSrcsReady()
+{
+    for (int i = 1; i < this->numSrcRegs(); ++i) {
+        if (!srcInsts[i]->isCompleted())
+            return false;
+    }
+
+    return true;
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::clearDependents()
+{
+    dependents.clear();
+    for (int i = 0; i < this->numSrcRegs(); ++i) {
+        srcInsts[i] = NULL;
+    }
+    for (int i = 0; i < this->numDestRegs(); ++i) {
+        prevDestInst[i] = NULL;
+    }
+}
+template <class Impl>
+MiscReg
+OzoneDynInst<Impl>::readMiscReg(int misc_reg)
+{
+    return this->thread->readMiscReg(misc_reg);
+}
+
+template <class Impl>
+MiscReg
+OzoneDynInst<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault)
+{
+    return this->thread->readMiscRegWithEffect(misc_reg, fault);
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
+{
+    return this->thread->setMiscReg(misc_reg, val);
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+{
+    return this->thread->setMiscRegWithEffect(misc_reg, val);
+}
+
+#if FULL_SYSTEM
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::hwrei()
+{
+    if (!this->cpu->inPalMode(this->readPC()))
+        return new AlphaISA::UnimplementedOpcodeFault;
+
+    this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
+
+    this->cpu->kernelStats->hwrei();
+
+    this->cpu->checkInterrupts = true;
+
+    // FIXME: XXX check for interrupts? XXX
+    return NoFault;
+}
+
+template <class Impl>
+int
+OzoneDynInst<Impl>::readIntrFlag()
+{
+return this->cpu->readIntrFlag();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::setIntrFlag(int val)
+{
+    this->cpu->setIntrFlag(val);
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::inPalMode()
+{
+    return this->cpu->inPalMode();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::trap(Fault fault)
+{
+    fault->invoke(this->thread->getXCProxy());
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::simPalCheck(int palFunc)
+{
+    return this->cpu->simPalCheck(palFunc);
+}
+#else
+template <class Impl>
+void
+OzoneDynInst<Impl>::syscall()
+{
+    this->cpu->syscall();
+}
+#endif
diff --git a/cpu/ozone/front_end.cc b/cpu/ozone/front_end.cc
new file mode 100644
index 000000000..a974d43cb
--- /dev/null
+++ b/cpu/ozone/front_end.cc
@@ -0,0 +1,7 @@
+
+#include "cpu/ozone/front_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class FrontEnd<OzoneImpl>;
+template class FrontEnd<SimpleImpl>;
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
new file mode 100644
index 000000000..5e257b506
--- /dev/null
+++ b/cpu/ozone/front_end.hh
@@ -0,0 +1,242 @@
+
+#ifndef __CPU_OZONE_FRONT_END_HH__
+#define __CPU_OZONE_FRONT_END_HH__
+
+#include <deque>
+
+//#include "cpu/ozone/cpu.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/rename_table.hh"
+//#include "cpu/ozone/thread_state.hh"
+#include "mem/mem_req.hh"
+#include "sim/eventq.hh"
+#include "sim/stats.hh"
+
+class ExecContext;
+class MemInterface;
+template <class>
+class OzoneThreadState;
+class PageTable;
+template <class>
+class TimeBuffer;
+
+template <class Impl>
+class FrontEnd
+{
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::DynInst DynInst;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::BackEnd BackEnd;
+
+    typedef typename Impl::FullCPU::OzoneXC OzoneXC;
+    typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+    FrontEnd(Params *params);
+
+    std::string name() const;
+
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    void setBackEnd(BackEnd *back_end_ptr)
+    { backEnd = back_end_ptr; }
+
+    void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+    void setXC(ExecContext *xc_ptr);
+
+    void setThreadState(OzoneThreadState<Impl> *thread_ptr)
+    { thread = thread_ptr; }
+
+    void regStats();
+
+    void tick();
+    Fault fetchCacheLine();
+    void processInst(DynInstPtr &inst);
+    void squash(const InstSeqNum &squash_num, const Addr &next_PC,
+                const bool is_branch = false, const bool branch_taken = false);
+    DynInstPtr getInst();
+
+    void processCacheCompletion();
+
+    void addFreeRegs(int num_freed);
+
+    bool isEmpty() { return instBuffer.empty(); }
+
+  private:
+    bool updateStatus();
+
+    void checkBE();
+    DynInstPtr getInstFromCacheline();
+    void renameInst(DynInstPtr &inst);
+    // Returns true if we need to stop the front end this cycle
+    bool processBarriers(DynInstPtr &inst);
+
+    void handleFault(Fault &fault);
+
+    // Align an address (typically a PC) to the start of an I-cache block.
+    // We fold in the PISA 64- to 32-bit conversion here as well.
+    Addr icacheBlockAlignPC(Addr addr)
+    {
+        addr = TheISA::realPCToFetchPC(addr);
+        return (addr & ~(cacheBlkMask));
+    }
+
+    InstSeqNum getAndIncrementInstSeq()
+    { return cpu->globalSeqNum++; }
+
+  public:
+    FullCPU *cpu;
+
+    BackEnd *backEnd;
+
+    ExecContext *xc;
+
+    OzoneThreadState<Impl> *thread;
+
+    enum Status {
+        Running,
+        Idle,
+        IcacheMissStall,
+        IcacheMissComplete,
+        SerializeBlocked,
+        SerializeComplete,
+        RenameBlocked,
+        BEBlocked
+    };
+
+    Status status;
+
+  private:
+    TimeBuffer<CommStruct> *comm;
+    typename TimeBuffer<CommStruct>::wire fromCommit;
+
+    typedef typename Impl::BranchPred BranchPred;
+
+    // Typedef for semi-opaque type that holds any information the branch
+    // predictor needs to update itself.  Only two fields are used outside of
+    // branch predictor, nextPC and isTaken.
+//    typedef typename BranchPred::BPredInfo BPredInfo;
+
+    BranchPred branchPred;
+
+    class ICacheCompletionEvent : public Event
+    {
+      private:
+        FrontEnd *frontEnd;
+
+      public:
+        ICacheCompletionEvent(FrontEnd *_fe);
+
+        virtual void process();
+        virtual const char *description();
+    };
+
+    ICacheCompletionEvent cacheCompletionEvent;
+
+    MemInterface *icacheInterface;
+
+#if !FULL_SYSTEM
+    PageTable *pTable;
+#endif
+
+    MemReqPtr memReq;
+
+    /** Mask to get a cache block's address. */
+    Addr cacheBlkMask;
+
+    unsigned cacheBlkSize;
+
+    Addr cacheBlkPC;
+
+    /** The cache line being fetched. */
+    uint8_t *cacheData;
+
+    bool fetchCacheLineNextCycle;
+
+    bool cacheBlkValid;
+
+  public:
+    RenameTable<Impl> renameTable;
+
+  private:
+    Addr PC;
+    Addr nextPC;
+
+  public:
+    void setPC(Addr val) { PC = val; }
+    void setNextPC(Addr val) { nextPC = val; }
+
+    void dumpInsts();
+
+  private:
+    typedef typename std::deque<DynInstPtr> InstBuff;
+    typedef typename InstBuff::iterator InstBuffIt;
+
+    InstBuff instBuffer;
+
+    int instBufferSize;
+
+    int maxInstBufferSize;
+
+    int width;
+
+    int freeRegs;
+
+    int numPhysRegs;
+
+    bool serializeNext;
+
+    DynInstPtr barrierInst;
+
+    // number of idle cycles
+/*
+    Stats::Average<> notIdleFraction;
+    Stats::Formula idleFraction;
+*/
+    // @todo: Consider making these vectors and tracking on a per thread basis.
+    /** Stat for total number of cycles stalled due to an icache miss. */
+    Stats::Scalar<> icacheStallCycles;
+    /** Stat for total number of fetched instructions. */
+    Stats::Scalar<> fetchedInsts;
+    Stats::Scalar<> fetchedBranches;
+    /** Stat for total number of predicted branches. */
+    Stats::Scalar<> predictedBranches;
+    /** Stat for total number of cycles spent fetching. */
+    Stats::Scalar<> fetchCycles;
+
+    Stats::Scalar<> fetchIdleCycles;
+    /** Stat for total number of cycles spent squashing. */
+    Stats::Scalar<> fetchSquashCycles;
+    /** Stat for total number of cycles spent blocked due to other stages in
+     * the pipeline.
+     */
+    Stats::Scalar<> fetchBlockedCycles;
+    /** Stat for total number of fetched cache lines. */
+    Stats::Scalar<> fetchedCacheLines;
+    /** Distribution of number of instructions fetched each cycle. */
+    Stats::Distribution<> fetchNisnDist;
+//    Stats::Vector<> qfull_iq_occupancy;
+//    Stats::VectorDistribution<> qfull_iq_occ_dist_;
+    Stats::Formula idleRate;
+    Stats::Formula branchRate;
+    Stats::Formula fetchRate;
+    Stats::Scalar<> IFQCount;	// cumulative IFQ occupancy
+    Stats::Formula IFQOccupancy;
+    Stats::Formula IFQLatency;
+    Stats::Scalar<> IFQFcount; // cumulative IFQ full count
+    Stats::Formula IFQFullRate;
+
+    Stats::Scalar<> dispatchCountStat;
+    Stats::Scalar<> dispatchedSerializing;
+    Stats::Scalar<> dispatchedTempSerializing;
+    Stats::Scalar<> dispatchSerializeStallCycles;
+    Stats::Formula dispatchRate;
+    Stats::Formula regIntFull;
+    Stats::Formula regFpFull;
+};
+
+#endif // __CPU_OZONE_FRONT_END_HH__
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
new file mode 100644
index 000000000..0136d0ef0
--- /dev/null
+++ b/cpu/ozone/front_end_impl.hh
@@ -0,0 +1,798 @@
+
+#include "arch/isa_traits.hh"
+#include "base/statistics.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/ozone/front_end.hh"
+#include "mem/mem_interface.hh"
+#include "sim/byte_swap.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+FrontEnd<Impl>::FrontEnd(Params *params)
+    : branchPred(params),
+      cacheCompletionEvent(this),
+      icacheInterface(params->icacheInterface),
+      instBufferSize(0),
+      maxInstBufferSize(params->maxInstBufferSize),
+      width(params->frontEndWidth),
+      freeRegs(params->numPhysicalRegs),
+      numPhysRegs(params->numPhysicalRegs),
+      serializeNext(false)
+{
+    status = Idle;
+
+    // Setup branch predictor.
+
+    // Setup Memory Request
+    memReq = new MemReq();
+    memReq->asid = 0;
+    memReq->data = new uint8_t[64];
+
+    // Size of cache block.
+    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+    assert(isPowerOf2(cacheBlkSize));
+
+    // Create mask to get rid of offset bits.
+    cacheBlkMask = (cacheBlkSize - 1);
+
+    // Create space to store a cache line.
+    cacheData = new uint8_t[cacheBlkSize];
+
+    fetchCacheLineNextCycle = true;
+
+    cacheBlkValid = false;
+
+#if !FULL_SYSTEM
+    pTable = params->pTable;
+#endif
+}
+
+template <class Impl>
+std::string
+FrontEnd<Impl>::name() const
+{
+    return cpu->name() + ".frontend";
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+    comm = _comm;
+    // @todo: Hardcoded for now.  Allow this to be set by a latency.
+    fromCommit = comm->getWire(-1);
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
+{
+    xc = xc_ptr;
+    memReq->xc = xc;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::regStats()
+{
+    icacheStallCycles
+        .name(name() + ".icacheStallCycles")
+        .desc("Number of cycles fetch is stalled on an Icache miss")
+        .prereq(icacheStallCycles);
+
+    fetchedInsts
+        .name(name() + ".fetchedInsts")
+        .desc("Number of instructions fetch has processed")
+        .prereq(fetchedInsts);
+
+    fetchedBranches
+        .name(name() + ".fetchedBranches")
+        .desc("Number of fetched branches")
+        .prereq(fetchedBranches);
+
+    predictedBranches
+        .name(name() + ".predictedBranches")
+        .desc("Number of branches that fetch has predicted taken")
+        .prereq(predictedBranches);
+
+    fetchCycles
+        .name(name() + ".fetchCycles")
+        .desc("Number of cycles fetch has run and was not squashing or"
+              " blocked")
+        .prereq(fetchCycles);
+
+    fetchIdleCycles
+        .name(name() + ".fetchIdleCycles")
+        .desc("Number of cycles fetch was idle")
+        .prereq(fetchIdleCycles);
+
+    fetchSquashCycles
+        .name(name() + ".fetchSquashCycles")
+        .desc("Number of cycles fetch has spent squashing")
+        .prereq(fetchSquashCycles);
+
+    fetchBlockedCycles
+        .name(name() + ".fetchBlockedCycles")
+        .desc("Number of cycles fetch has spent blocked")
+        .prereq(fetchBlockedCycles);
+
+    fetchedCacheLines
+        .name(name() + ".fetchedCacheLines")
+        .desc("Number of cache lines fetched")
+        .prereq(fetchedCacheLines);
+
+    fetchNisnDist
+        .init(/* base value */ 0,
+              /* last value */ width,
+              /* bucket size */ 1)
+        .name(name() + ".rateDist")
+        .desc("Number of instructions fetched each cycle (Total)")
+        .flags(Stats::pdf);
+
+    idleRate
+        .name(name() + ".idleRate")
+        .desc("Percent of cycles fetch was idle")
+        .prereq(idleRate);
+    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
+
+    branchRate
+        .name(name() + ".branchRate")
+        .desc("Number of branch fetches per cycle")
+        .flags(Stats::total);
+    branchRate = fetchedBranches / cpu->numCycles;
+
+    fetchRate
+        .name(name() + ".rate")
+        .desc("Number of inst fetches per cycle")
+        .flags(Stats::total);
+    fetchRate = fetchedInsts / cpu->numCycles;
+
+    IFQCount
+        .name(name() + ".IFQ:count")
+        .desc("cumulative IFQ occupancy")
+        ;
+
+    IFQFcount
+        .name(name() + ".IFQ:fullCount")
+        .desc("cumulative IFQ full count")
+        .flags(Stats::total)
+        ;
+
+    IFQOccupancy
+        .name(name() + ".IFQ:occupancy")
+        .desc("avg IFQ occupancy (inst's)")
+        ;
+    IFQOccupancy = IFQCount / cpu->numCycles;
+
+    IFQLatency
+        .name(name() + ".IFQ:latency")
+        .desc("avg IFQ occupant latency (cycle's)")
+        .flags(Stats::total)
+        ;
+
+    IFQFullRate
+        .name(name() + ".IFQ:fullRate")
+        .desc("fraction of time (cycles) IFQ was full")
+        .flags(Stats::total);
+        ;
+    IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
+
+    dispatchCountStat
+        .name(name() + ".DIS:count")
+        .desc("cumulative count of dispatched insts")
+        .flags(Stats::total)
+        ;
+
+    dispatchedSerializing
+        .name(name() + ".DIS:serializingInsts")
+        .desc("count of serializing insts dispatched")
+        .flags(Stats::total)
+        ;
+
+    dispatchedTempSerializing
+        .name(name() + ".DIS:tempSerializingInsts")
+        .desc("count of temporary serializing insts dispatched")
+        .flags(Stats::total)
+        ;
+
+    dispatchSerializeStallCycles
+        .name(name() + ".DIS:serializeStallCycles")
+        .desc("count of cycles dispatch stalled for serializing inst")
+        .flags(Stats::total)
+        ;
+
+    dispatchRate
+        .name(name() + ".DIS:rate")
+        .desc("dispatched insts per cycle")
+        .flags(Stats::total)
+        ;
+    dispatchRate = dispatchCountStat / cpu->numCycles;
+
+    regIntFull
+        .name(name() + ".REG:int:full")
+        .desc("number of cycles where there were no INT registers")
+        ;
+
+    regFpFull
+        .name(name() + ".REG:fp:full")
+        .desc("number of cycles where there were no FP registers")
+        ;
+    IFQLatency = IFQOccupancy / dispatchRate;
+
+    branchPred.regStats();
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::tick()
+{
+    // @todo: Maybe I want to just have direct communication...
+    if (fromCommit->doneSeqNum) {
+        branchPred.update(fromCommit->doneSeqNum, 0);
+    }
+
+    IFQCount += instBufferSize;
+    IFQFcount += instBufferSize == maxInstBufferSize;
+
+    // Fetch cache line
+    if (status == IcacheMissComplete) {
+        cacheBlkValid = true;
+
+        status = Running;
+        if (barrierInst)
+            status = SerializeBlocked;
+        if (freeRegs <= 0)
+            status = RenameBlocked;
+        checkBE();
+    } else if (status == IcacheMissStall) {
+        DPRINTF(FE, "Still in Icache miss stall.\n");
+        icacheStallCycles++;
+        return;
+    }
+
+    if (status == RenameBlocked || status == SerializeBlocked ||
+        status == BEBlocked) {
+        // This might cause the front end to run even though it
+        // shouldn't, but this should only be a problem for one cycle.
+        // Also will cause a one cycle bubble between changing state
+        // and restarting.
+        DPRINTF(FE, "In blocked status.\n");
+
+        fetchBlockedCycles++;
+
+        if (status == SerializeBlocked) {
+            dispatchSerializeStallCycles++;
+        }
+        updateStatus();
+        return;
+    } else if (status != IcacheMissComplete) {
+        if (fetchCacheLineNextCycle) {
+            Fault fault = fetchCacheLine();
+            if (fault != NoFault) {
+                handleFault(fault);
+                return;
+            }
+            fetchCacheLineNextCycle = false;
+        }
+        // If miss, stall until it returns.
+        if (status == IcacheMissStall) {
+            // Tell CPU to not tick me for now.
+            return;
+        }
+    }
+
+    fetchCycles++;
+
+    int num_inst = 0;
+
+    // Otherwise loop and process instructions.
+    // One way to hack infinite width is to set width and maxInstBufferSize
+    // both really high.  Inelegant, but probably will work.
+    while (num_inst < width &&
+           instBufferSize < maxInstBufferSize) {
+        // Get instruction from cache line.
+        DynInstPtr inst = getInstFromCacheline();
+
+        if (!inst) {
+            // PC is no longer in the cache line, end fetch.
+            // Might want to check this at the end of the cycle so that
+            // there's no cycle lost to checking for a new cache line.
+            DPRINTF(FE, "Need to get new cache line\n");
+            fetchCacheLineNextCycle = true;
+            break;
+        }
+
+        // if (generalizeFetch) {
+        processInst(inst);
+
+        if (status == SerializeBlocked) {
+            break;
+        }
+
+        // Possibly push into a time buffer that estimates the front end
+        // latency
+        instBuffer.push_back(inst);
+        ++instBufferSize;
+        ++num_inst;
+        // } else {
+        // fetch(num_inst);
+        // decode(num_inst);
+        // rename(num_inst);
+        // }
+
+        if (inst->predTaken()) {
+            // Start over with tick?
+            break;
+        } else if (freeRegs <= 0) {
+            DPRINTF(FE, "Ran out of free registers to rename to!\n");
+            status = RenameBlocked;
+            break;
+        } else if (serializeNext) {
+            break;
+        }
+    }
+
+    fetchNisnDist.sample(num_inst);
+    checkBE();
+
+    DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
+            "Regs %i\n", num_inst, instBufferSize, freeRegs);
+}
+
+template <class Impl>
+Fault
+FrontEnd<Impl>::fetchCacheLine()
+{
+    // Read a cache line, based on the current PC.
+#if FULL_SYSTEM
+    // Flag to say whether or not address is physical addr.
+    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
+#else
+    unsigned flags = 0;
+#endif // FULL_SYSTEM
+    Fault fault = NoFault;
+
+    // Align the fetch PC so it's at the start of a cache block.
+    Addr fetch_PC = icacheBlockAlignPC(PC);
+
+    DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
+
+    // Setup the memReq to do a read of the first isntruction's address.
+    // Set the appropriate read size and flags as well.
+    memReq->cmd = Read;
+    memReq->reset(fetch_PC, cacheBlkSize, flags);
+
+    // Translate the instruction request.
+    fault = cpu->translateInstReq(memReq);
+
+    // In the case of faults, the fetch stage may need to stall and wait
+    // on what caused the fetch (ITB or Icache miss).
+//    assert(fault == NoFault);
+
+    // Now do the timing access to see whether or not the instruction
+    // exists within the cache.
+    if (icacheInterface && fault == NoFault) {
+        memReq->completionEvent = NULL;
+
+        memReq->time = curTick;
+
+        MemAccessResult res = icacheInterface->access(memReq);
+
+        // If the cache missed then schedule an event to wake
+        // up this stage once the cache miss completes.
+        if (icacheInterface->doEvents() && res != MA_HIT) {
+            memReq->completionEvent = new ICacheCompletionEvent(this);
+
+            status = IcacheMissStall;
+
+            cacheBlkValid = false;
+
+            DPRINTF(FE, "Cache miss.\n");
+        }  else {
+            DPRINTF(FE, "Cache hit.\n");
+
+            cacheBlkValid = true;
+
+            memcpy(cacheData, memReq->data, memReq->size);
+        }
+    }
+
+    // Note that this will set the cache block PC a bit earlier than it should
+    // be set.
+    cacheBlkPC = fetch_PC;
+
+    ++fetchedCacheLines;
+
+    DPRINTF(FE, "Done fetching cache line.\n");
+
+    return fault;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::processInst(DynInstPtr &inst)
+{
+    if (processBarriers(inst)) {
+        return;
+    }
+
+    Addr inst_PC = inst->readPC();
+
+//    BPredInfo bp_info = branchPred.lookup(inst_PC);
+    if (!inst->isControl()) {
+        inst->setPredTarg(inst->readNextPC());
+    } else {
+        fetchedBranches++;
+        if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
+            predictedBranches++;
+        }
+    }
+
+    Addr next_PC = inst->readPredTarg();
+
+    DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
+            "%#x\n", inst->seqNum, inst_PC, next_PC);
+
+//    inst->setNextPC(next_PC);
+//    inst->setBPredInfo(bp_info);
+
+    // Not sure where I should set this
+    PC = next_PC;
+
+    renameInst(inst);
+}
+
+template <class Impl>
+bool
+FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
+{
+    if (serializeNext) {
+        inst->setSerializeBefore();
+        serializeNext = false;
+    } else if (!inst->isSerializing()) {
+        return false;
+    }
+
+    if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
+        DPRINTF(FE, "Serialize before instruction encountered.\n");
+
+        if (!inst->isTempSerializeBefore()) {
+            dispatchedSerializing++;
+            inst->setSerializeHandled();
+        } else {
+            dispatchedTempSerializing++;
+        }
+
+        // Change status over to BarrierStall so that other stages know
+        // what this is blocked on.
+        status = SerializeBlocked;
+
+        barrierInst = inst;
+        return true;
+    } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
+        DPRINTF(FE, "Serialize after instruction encountered.\n");
+
+        inst->setSerializeHandled();
+
+        dispatchedSerializing++;
+
+        serializeNext = true;
+        return false;
+    }
+    return false;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::handleFault(Fault &fault)
+{
+    DPRINTF(FE, "Fault at fetch, telling commit\n");
+    backEnd->fetchFault(fault);
+    // We're blocked on the back end until it handles this fault.
+    status = BEBlocked;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
+                       const bool is_branch, const bool branch_taken)
+{
+    DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
+            squash_num, next_PC);
+
+    while (!instBuffer.empty() &&
+           instBuffer.back()->seqNum > squash_num) {
+        DynInstPtr inst = instBuffer.back();
+
+        DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+                inst->seqNum, inst->readPC());
+
+        inst->clearDependents();
+
+        instBuffer.pop_back();
+        --instBufferSize;
+
+        // Fix up branch predictor if necessary.
+//        branchPred.undo(inst->getBPredInfo());
+
+        freeRegs+= inst->numDestRegs();
+    }
+
+    // Copy over rename table from the back end.
+    renameTable.copyFrom(backEnd->renameTable);
+
+    PC = next_PC;
+
+    // Update BP with proper information.
+    if (is_branch) {
+        branchPred.squash(squash_num, next_PC, branch_taken, 0);
+    } else {
+        branchPred.squash(squash_num, 0);
+    }
+
+    // Clear the icache miss if it's outstanding.
+    if (status == IcacheMissStall && icacheInterface) {
+        DPRINTF(FE, "Squashing outstanding Icache miss.\n");
+        icacheInterface->squash(0);
+    }
+
+    if (status == SerializeBlocked) {
+        assert(barrierInst->seqNum > squash_num);
+        barrierInst = NULL;
+    }
+
+    // Unless this squash originated from the front end, we're probably
+    // in running mode now.
+    // Actually might want to make this latency dependent.
+    status = Running;
+    fetchCacheLineNextCycle = true;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+FrontEnd<Impl>::getInst()
+{
+    if (instBufferSize == 0) {
+        return NULL;
+    }
+
+    DynInstPtr inst = instBuffer.front();
+
+    instBuffer.pop_front();
+
+    --instBufferSize;
+
+    dispatchCountStat++;
+
+    return inst;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::processCacheCompletion()
+{
+    DPRINTF(FE, "Processing cache completion\n");
+
+    // Do something here.
+    if (status != IcacheMissStall) {
+        DPRINTF(FE, "Previous fetch was squashed.\n");
+        return;
+    }
+
+    status = IcacheMissComplete;
+
+/*    if (checkStall(tid)) {
+        fetchStatus[tid] = Blocked;
+    } else {
+        fetchStatus[tid] = IcacheMissComplete;
+    }
+*/
+    memcpy(cacheData, memReq->data, memReq->size);
+
+    // Reset the completion event to NULL.
+    memReq->completionEvent = NULL;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::addFreeRegs(int num_freed)
+{
+    if (status == RenameBlocked && freeRegs + num_freed > 0) {
+        status = Running;
+    }
+
+    freeRegs+= num_freed;
+
+    assert(freeRegs <= numPhysRegs);
+}
+
+template <class Impl>
+bool
+FrontEnd<Impl>::updateStatus()
+{
+//    bool rename_block = freeRegs <= 0;
+    bool serialize_block = !backEnd->robEmpty() || instBufferSize;
+    bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+    bool ret_val = false;
+/*
+  // Should already be handled through addFreeRegs function
+    if (status == RenameBlocked && !rename_block) {
+        status = Running;
+        ret_val = true;
+    }
+*/
+
+    if (status == SerializeBlocked && !serialize_block) {
+        status = SerializeComplete;
+        ret_val = true;
+    }
+
+    if (status == BEBlocked && !be_block) {
+        if (barrierInst) {
+            status = SerializeBlocked;
+        } else {
+            status = Running;
+        }
+        ret_val = true;
+    }
+    return ret_val;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::checkBE()
+{
+    bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+    if (be_block) {
+        if (status == Running || status == Idle) {
+            status = BEBlocked;
+        }
+    }
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+FrontEnd<Impl>::getInstFromCacheline()
+{
+    if (status == SerializeComplete) {
+        DynInstPtr inst = barrierInst;
+        status = Running;
+        barrierInst = NULL;
+        return inst;
+    }
+
+    InstSeqNum inst_seq;
+    MachInst inst;
+    // @todo: Fix this magic number used here to handle word offset (and
+    // getting rid of PAL bit)
+    unsigned offset = (PC & cacheBlkMask) & ~3;
+
+    // PC of inst is not in this cache block
+    if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
+//        DPRINTF(OoOCPU, "OoOCPU: PC is not in this cache block\n");
+//        DPRINTF(OoOCPU, "OoOCPU: PC: %#x, cacheBlkPC: %#x, cacheBlkValid: %i",
+//                PC, cacheBlkPC, cacheBlkValid);
+//        panic("Instruction not in cache line or cache line invalid!");
+        return NULL;
+    }
+
+    //////////////////////////
+    // Fetch one instruction
+    //////////////////////////
+
+    // Get a sequence number.
+    inst_seq = getAndIncrementInstSeq();
+
+    // Make sure this is a valid index.
+    assert(offset <= cacheBlkSize - sizeof(MachInst));
+
+    // Get the instruction from the array of the cache line.
+    inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
+
+    ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
+
+    // Create a new DynInst from the instruction fetched.
+    DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
+                                         inst_seq, cpu);
+
+    instruction->setState(thread);
+
+    DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
+            inst_seq, instruction->readPC(),
+            instruction->staticInst->disassemble(PC));
+
+    instruction->traceData =
+        Trace::getInstRecord(curTick, xc, cpu,
+                             instruction->staticInst,
+                             instruction->readPC(), 0);
+
+    // Increment stat of fetched instructions.
+    ++fetchedInsts;
+
+    return instruction;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::renameInst(DynInstPtr &inst)
+{
+    DynInstPtr src_inst = NULL;
+    int num_src_regs = inst->numSrcRegs();
+    if (num_src_regs == 0) {
+        inst->setCanIssue();
+    } else {
+        for (int i = 0; i < num_src_regs; ++i) {
+            src_inst = renameTable[inst->srcRegIdx(i)];
+
+            inst->setSrcInst(src_inst, i);
+
+            DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
+                    inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
+
+            if (src_inst->isCompleted()) {
+                DPRINTF(FE, "Reg ready.\n");
+                inst->markSrcRegReady(i);
+            } else {
+                DPRINTF(FE, "Adding to dependent list.\n");
+                src_inst->addDependent(inst);
+            }
+        }
+    }
+
+    for (int i = 0; i < inst->numDestRegs(); ++i) {
+        RegIndex idx = inst->destRegIdx(i);
+
+        DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
+                "[sn:%lli]\n",
+                (int)inst->destRegIdx(i), inst->seqNum,
+                renameTable[idx]->seqNum);
+
+        inst->setPrevDestInst(renameTable[idx], i);
+
+        renameTable[idx] = inst;
+        --freeRegs;
+    }
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::dumpInsts()
+{
+    cprintf("instBuffer size: %i\n", instBuffer.size());
+
+    InstBuffIt buff_it = instBuffer.begin();
+
+    for (int num = 0; buff_it != instBuffer.end(); num++) {
+        cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+                "Squashed:%i\n\n",
+                num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
+                (*buff_it)->seqNum, (*buff_it)->isIssued(),
+                (*buff_it)->isSquashed());
+        buff_it++;
+    }
+
+}
+
+template <class Impl>
+FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(FrontEnd *fe)
+    : Event(&mainEventQueue, Delayed_Writeback_Pri), frontEnd(fe)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::ICacheCompletionEvent::process()
+{
+    frontEnd->processCacheCompletion();
+}
+
+template <class Impl>
+const char *
+FrontEnd<Impl>::ICacheCompletionEvent::description()
+{
+    return "ICache completion event";
+}
diff --git a/cpu/ozone/inorder_back_end.cc b/cpu/ozone/inorder_back_end.cc
new file mode 100644
index 000000000..14db610d2
--- /dev/null
+++ b/cpu/ozone/inorder_back_end.cc
@@ -0,0 +1,5 @@
+
+#include "cpu/ozone/inorder_back_end_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class InorderBackEnd<SimpleImpl>;
diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh
new file mode 100644
index 000000000..e621f6c01
--- /dev/null
+++ b/cpu/ozone/inorder_back_end.hh
@@ -0,0 +1,417 @@
+
+#ifndef __CPU_OZONE_INORDER_BACK_END_HH__
+#define __CPU_OZONE_INORDER_BACK_END_HH__
+
+#include <list>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
+#include "sim/eventq.hh"
+
+template <class Impl>
+class InorderBackEnd
+{
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::FrontEnd FrontEnd;
+
+    typedef typename FullCPU::OzoneXC OzoneXC;
+    typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+    InorderBackEnd(Params *params);
+
+    std::string name() const;
+
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    void setFrontEnd(FrontEnd *front_end_ptr)
+    { frontEnd = front_end_ptr; }
+
+    void setCommBuffer(TimeBuffer<CommStruct> *_comm)
+    { comm = _comm; }
+
+    void setXC(ExecContext *xc_ptr);
+
+    void setThreadState(OzoneThreadState<Impl> *thread_ptr);
+
+    void regStats() { }
+
+#if FULL_SYSTEM
+    void checkInterrupts();
+#endif
+
+    void tick();
+    void executeInsts();
+    void squash(const InstSeqNum &squash_num, const Addr &next_PC);
+
+    void squashFromXC();
+
+    bool robEmpty() { return instList.empty(); }
+
+    bool isFull() { return false; }
+    bool isBlocked() { return status == DcacheMissStoreStall ||
+                           status == DcacheMissLoadStall ||
+                           interruptBlocked; }
+
+    void fetchFault(Fault &fault);
+
+    void dumpInsts();
+
+  private:
+    void handleFault();
+
+    void setSquashInfoFromXC();
+
+    bool squashPending;
+    InstSeqNum squashSeqNum;
+    Addr squashNextPC;
+
+    Fault faultFromFetch;
+
+    bool interruptBlocked;
+
+  public:
+    template <class T>
+    Fault read(Addr addr, T &data, unsigned flags);
+
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx);
+
+    template <class T>
+    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx);
+
+    Addr readCommitPC() { return commitPC; }
+
+    Addr commitPC;
+
+  public:
+    FullCPU *cpu;
+
+    FrontEnd *frontEnd;
+
+    ExecContext *xc;
+
+    OzoneThreadState<Impl> *thread;
+
+    RenameTable<Impl> renameTable;
+
+  protected:
+    enum Status {
+        Running,
+        Idle,
+        DcacheMissLoadStall,
+        DcacheMissStoreStall,
+        DcacheMissComplete,
+        Blocked
+    };
+
+    Status status;
+
+    class DCacheCompletionEvent : public Event
+    {
+      private:
+        InorderBackEnd *be;
+
+      public:
+        DCacheCompletionEvent(InorderBackEnd *_be);
+
+        virtual void process();
+        virtual const char *description();
+
+        DynInstPtr inst;
+    };
+
+    friend class DCacheCompletionEvent;
+
+    DCacheCompletionEvent cacheCompletionEvent;
+
+    MemInterface *dcacheInterface;
+
+    MemReqPtr memReq;
+
+  private:
+    typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+    std::list<DynInstPtr> instList;
+
+    // General back end width. Used if the more specific isn't given.
+    int width;
+
+    int latency;
+
+    int squashLatency;
+
+    TimeBuffer<int> numInstsToWB;
+    TimeBuffer<int>::wire instsAdded;
+    TimeBuffer<int>::wire instsToExecute;
+
+    TimeBuffer<CommStruct> *comm;
+    // number of cycles stalled for D-cache misses
+    Stats::Scalar<> dcacheStallCycles;
+    Counter lastDcacheStall;
+};
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags)
+{
+    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataReadReq(memReq);
+
+    // if we have a cache, do cache access too
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Read;
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT) {
+            // Fix this hack for keeping funcExeInst correct with loads that
+            // are executed twice.
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+            status = DcacheMissLoadStall;
+            DPRINTF(IBE, "Dcache miss stall!\n");
+        } else {
+            // do functional access
+            DPRINTF(IBE, "Dcache hit!\n");
+        }
+    }
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Read");
+*/
+    return fault;
+}
+#if 0
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(MemReqPtr &req, T &data)
+{
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+    if (req->flags & LOCKED) {
+        req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+        req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+    }
+#endif
+
+    Fault error;
+    error = thread->mem->read(req, data);
+    data = LittleEndianGuest::gtoh(data);
+    return error;
+}
+#endif
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataWriteReq(memReq);
+
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Write;
+//	memcpy(memReq->data,(uint8_t *)&data,memReq->size);
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT) {
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+            status = DcacheMissStoreStall;
+            DPRINTF(IBE, "Dcache miss stall!\n");
+        } else {
+            DPRINTF(IBE, "Dcache hit!\n");
+        }
+    }
+
+    if (res && (fault == NoFault))
+        *res = memReq->result;
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Write");
+*/
+    return fault;
+}
+#if 0
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(MemReqPtr &req, T &data)
+{
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+    ExecContext *xc;
+
+    // If this is a store conditional, act appropriately
+    if (req->flags & LOCKED) {
+        xc = req->xc;
+
+        if (req->flags & UNCACHEABLE) {
+            // Don't update result register (see stq_c in isa_desc)
+            req->result = 2;
+            xc->setStCondFailures(0);//Needed? [RGD]
+        } else {
+            bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+            Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+            req->result = lock_flag;
+            if (!lock_flag ||
+                ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+                xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+                xc->setStCondFailures(xc->readStCondFailures() + 1);
+                if (((xc->readStCondFailures()) % 100000) == 0) {
+                    std::cerr << "Warning: "
+                              << xc->readStCondFailures()
+                              << " consecutive store conditional failures "
+                              << "on cpu " << req->xc->readCpuId()
+                              << std::endl;
+                }
+                return NoFault;
+            }
+            else xc->setStCondFailures(0);
+        }
+    }
+
+    // Need to clear any locked flags on other proccessors for
+    // this address.  Only do this for succsful Store Conditionals
+    // and all other stores (WH64?).  Unsuccessful Store
+    // Conditionals would have returned above, and wouldn't fall
+    // through.
+    for (int i = 0; i < cpu->system->execContexts.size(); i++){
+        xc = cpu->system->execContexts[i];
+        if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+            (req->paddr & ~0xf)) {
+            xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+        }
+    }
+
+#endif
+    return thread->mem->write(req, (T)LittleEndianGuest::htog(data));
+}
+#endif
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+//    panic("Unimplemented!");
+//    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+//    Fault fault = cpu->translateDataReadReq(req);
+
+    // if we have a cache, do cache access too
+    if (dcacheInterface) {
+        req->cmd = Read;
+        req->completionEvent = NULL;
+        req->data = new uint8_t[64];
+        req->time = curTick;
+        req->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(req);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT) {
+            req->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+            status = DcacheMissLoadStall;
+            DPRINTF(IBE, "Dcache miss load stall!\n");
+        } else {
+            DPRINTF(IBE, "Dcache hit!\n");
+
+        }
+    }
+
+/*
+    if (!dcacheInterface && (req->flags & UNCACHEABLE))
+        recordEvent("Uncached Read");
+*/
+    return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+//    req->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+//    Fault fault = cpu->translateDataWriteReq(req);
+
+    if (dcacheInterface) {
+        req->cmd = Write;
+        req->data = new uint8_t[64];
+        memcpy(req->data,(uint8_t *)&data,req->size);
+        req->completionEvent = NULL;
+        req->time = curTick;
+        req->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(req);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT) {
+            req->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+            status = DcacheMissStoreStall;
+            DPRINTF(IBE, "Dcache miss store stall!\n");
+        } else {
+            DPRINTF(IBE, "Dcache hit!\n");
+
+        }
+    }
+
+    if (req->flags & LOCKED) {
+        if (req->flags & UNCACHEABLE) {
+            // Don't update result register (see stq_c in isa_desc)
+            req->result = 2;
+        } else {
+            req->result = 1;
+        }
+    }
+/*
+    if (res && (fault == NoFault))
+        *res = req->result;
+        */
+/*
+    if (!dcacheInterface && (req->flags & UNCACHEABLE))
+        recordEvent("Uncached Write");
+*/
+    return NoFault;
+}
+
+#endif // __CPU_OZONE_INORDER_BACK_END_HH__
diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh
new file mode 100644
index 000000000..5a378ec76
--- /dev/null
+++ b/cpu/ozone/inorder_back_end_impl.hh
@@ -0,0 +1,519 @@
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/ozone/inorder_back_end.hh"
+#include "cpu/ozone/thread_state.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+InorderBackEnd<Impl>::InorderBackEnd(Params *params)
+    : squashPending(false),
+      squashSeqNum(0),
+      squashNextPC(0),
+      faultFromFetch(NoFault),
+      interruptBlocked(false),
+      cacheCompletionEvent(this),
+      dcacheInterface(params->dcacheInterface),
+      width(params->backEndWidth),
+      latency(params->backEndLatency),
+      squashLatency(params->backEndSquashLatency),
+      numInstsToWB(0, latency + 1)
+{
+    instsAdded = numInstsToWB.getWire(latency);
+    instsToExecute = numInstsToWB.getWire(0);
+
+    memReq = new MemReq;
+    memReq->data = new uint8_t[64];
+    status = Running;
+}
+
+template <class Impl>
+std::string
+InorderBackEnd<Impl>::name() const
+{
+    return cpu->name() + ".inorderbackend";
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setXC(ExecContext *xc_ptr)
+{
+    xc = xc_ptr;
+    memReq->xc = xc;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setThreadState(OzoneThreadState<Impl> *thread_ptr)
+{
+    thread = thread_ptr;
+    thread->setFuncExeInst(0);
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+InorderBackEnd<Impl>::checkInterrupts()
+{
+    //Check if there are any outstanding interrupts
+    //Handle the interrupts
+    int ipl = 0;
+    int summary = 0;
+
+    cpu->checkInterrupts = false;
+
+    if (thread->readMiscReg(IPR_ASTRR))
+        panic("asynchronous traps not implemented\n");
+
+    if (thread->readMiscReg(IPR_SIRR)) {
+        for (int i = INTLEVEL_SOFTWARE_MIN;
+             i < INTLEVEL_SOFTWARE_MAX; i++) {
+            if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    uint64_t interrupts = cpu->intr_status();
+
+    if (interrupts) {
+        for (int i = INTLEVEL_EXTERNAL_MIN;
+             i < INTLEVEL_EXTERNAL_MAX; i++) {
+            if (interrupts & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = i;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) {
+        thread->inSyscall = true;
+
+        thread->setMiscReg(IPR_ISR, summary);
+        thread->setMiscReg(IPR_INTID, ipl);
+        Fault(new InterruptFault)->invoke(xc);
+        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+                thread->readMiscReg(IPR_IPLR), ipl, summary);
+
+        // May need to go 1 inst prior
+        squashPending = true;
+
+        thread->inSyscall = false;
+
+        setSquashInfoFromXC();
+    }
+}
+#endif
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::tick()
+{
+    // Squash due to an external source
+    // Not sure if this or an interrupt has higher priority
+    if (squashPending) {
+        squash(squashSeqNum, squashNextPC);
+        return;
+    }
+
+    // if (interrupt) then set thread PC, stall front end, record that
+    // I'm waiting for it to drain.  (for now just squash)
+#if FULL_SYSTEM
+    if (interruptBlocked ||
+        (cpu->checkInterrupts &&
+        cpu->check_interrupts() &&
+        !cpu->inPalMode())) {
+        if (!robEmpty()) {
+            interruptBlocked = true;
+        } else if (robEmpty() && cpu->inPalMode()) {
+            // Will need to let the front end continue a bit until
+            // we're out of pal mode.  Hopefully we never get into an
+            // infinite loop...
+            interruptBlocked = false;
+        } else {
+            interruptBlocked = false;
+            checkInterrupts();
+            return;
+        }
+    }
+#endif
+
+    if (status != DcacheMissLoadStall &&
+        status != DcacheMissStoreStall) {
+        for (int i = 0; i < width && (*instsAdded) < width; ++i) {
+            DynInstPtr inst = frontEnd->getInst();
+
+            if (!inst)
+                break;
+
+            instList.push_back(inst);
+
+            (*instsAdded)++;
+        }
+
+#if FULL_SYSTEM
+        if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) {
+            handleFault();
+        } else {
+            executeInsts();
+        }
+#else
+        executeInsts();
+#endif
+    }
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::executeInsts()
+{
+    bool completed_last_inst = true;
+    int insts_to_execute = *instsToExecute;
+    int freed_regs = 0;
+
+    while (insts_to_execute > 0) {
+        assert(!instList.empty());
+        DynInstPtr inst = instList.front();
+
+        commitPC = inst->readPC();
+
+        thread->setPC(commitPC);
+        thread->setNextPC(inst->readNextPC());
+
+#if FULL_SYSTEM
+        int count = 0;
+        Addr oldpc;
+        do {
+            if (count == 0)
+                assert(!thread->inSyscall && !thread->trapPending);
+            oldpc = thread->readPC();
+            cpu->system->pcEventQueue.service(
+                thread->getXCProxy());
+            count++;
+        } while (oldpc != thread->readPC());
+        if (count > 1) {
+            DPRINTF(IBE, "PC skip function event, stopping commit\n");
+            completed_last_inst = false;
+            squashPending = true;
+            break;
+        }
+#endif
+
+        Fault inst_fault = NoFault;
+
+        if (status == DcacheMissComplete) {
+            DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum);
+            status = Running;
+        } else if (inst->isMemRef() && status != DcacheMissComplete &&
+            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+            DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n",
+                    inst->seqNum, inst->readPC());
+
+            cacheCompletionEvent.inst = inst;
+            inst_fault = inst->initiateAcc();
+            if (inst_fault == NoFault &&
+                status != DcacheMissLoadStall &&
+                status != DcacheMissStoreStall) {
+                inst_fault = inst->completeAcc();
+            }
+            ++thread->funcExeInst;
+        } else {
+            DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n",
+                    inst->seqNum, inst->readPC());
+            inst_fault = inst->execute();
+            ++thread->funcExeInst;
+        }
+
+        // Will need to be able to break this loop in case the load
+        // misses.  Split access/complete ops would be useful here
+        // with writeback events.
+        if (status == DcacheMissLoadStall) {
+            *instsToExecute = insts_to_execute;
+
+            completed_last_inst = false;
+            break;
+        } else if (status == DcacheMissStoreStall) {
+            // Figure out how to fix this hack.  Probably have DcacheMissLoad
+            // vs DcacheMissStore.
+            *instsToExecute = insts_to_execute;
+            completed_last_inst = false;
+/*
+            instList.pop_front();
+            --insts_to_execute;
+            if (inst->traceData) {
+                inst->traceData->finalize();
+            }
+*/
+
+            // Don't really need to stop for a store stall as long as
+            // the memory system is able to handle store forwarding
+            // and such.  Breaking out might help avoid the cache
+            // interface becoming blocked.
+            break;
+        }
+
+        inst->setExecuted();
+        inst->setCompleted();
+        inst->setCanCommit();
+
+        instList.pop_front();
+
+        --insts_to_execute;
+        --(*instsToExecute);
+
+        if (inst->traceData) {
+            inst->traceData->finalize();
+            inst->traceData = NULL;
+        }
+
+        if (inst_fault != NoFault) {
+#if FULL_SYSTEM
+            DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n",
+                    inst->seqNum, inst->readPC());
+
+            assert(!thread->inSyscall);
+
+            thread->inSyscall = true;
+
+            // Hack for now; DTB will sometimes need the machine instruction
+            // for when faults happen.  So we will set it here, prior to the
+            // DTB possibly needing it for this translation.
+            thread->setInst(
+                static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+
+            // Consider holding onto the trap and waiting until the trap event
+            // happens for this to be executed.
+            inst_fault->invoke(xc);
+
+            // Exit state update mode to avoid accidental updating.
+            thread->inSyscall = false;
+
+            squashPending = true;
+
+            // Generate trap squash event.
+//            generateTrapEvent(tid);
+            completed_last_inst = false;
+            break;
+#else // !FULL_SYSTEM
+            panic("fault (%d) detected @ PC %08p", inst_fault,
+                  inst->PC);
+#endif // FULL_SYSTEM
+        }
+
+        for (int i = 0; i < inst->numDestRegs(); ++i) {
+            renameTable[inst->destRegIdx(i)] = inst;
+            thread->renameTable[inst->destRegIdx(i)] = inst;
+            ++freed_regs;
+        }
+
+        inst->clearDependents();
+
+        comm->access(0)->doneSeqNum = inst->seqNum;
+
+        if (inst->mispredicted()) {
+            squash(inst->seqNum, inst->readNextPC());
+
+            thread->setNextPC(inst->readNextPC());
+
+            break;
+        } else if (squashPending) {
+            // Something external happened that caused the CPU to squash.
+            // Break out of commit and handle the squash next cycle.
+            break;
+        }
+        // If it didn't mispredict, then it executed fine.  Send back its
+        // registers and BP info?  What about insts that may still have
+        // latency, like loads?  Probably can send back the information after
+        // it is completed.
+
+        // keep an instruction count
+        cpu->numInst++;
+        thread->numInsts++;
+    }
+
+    frontEnd->addFreeRegs(freed_regs);
+
+    assert(insts_to_execute >= 0);
+
+    // Should only advance this if I have executed all instructions.
+    if (insts_to_execute == 0) {
+        numInstsToWB.advance();
+    }
+
+    // Should I set the PC to the next PC here?  What do I set next PC to?
+    if (completed_last_inst) {
+        thread->setPC(thread->readNextPC());
+        thread->setNextPC(thread->readPC() + sizeof(MachInst));
+    }
+
+    if (squashPending) {
+        setSquashInfoFromXC();
+    }
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::handleFault()
+{
+    DPRINTF(Commit, "Handling fault from fetch\n");
+
+    assert(!thread->inSyscall);
+
+    thread->inSyscall = true;
+
+    // Consider holding onto the trap and waiting until the trap event
+    // happens for this to be executed.
+    faultFromFetch->invoke(xc);
+
+    // Exit state update mode to avoid accidental updating.
+    thread->inSyscall = false;
+
+    squashPending = true;
+
+    setSquashInfoFromXC();
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC)
+{
+    DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n",
+            squash_num, next_PC);
+
+    InstListIt squash_it = --(instList.end());
+
+    int freed_regs = 0;
+
+    while (!instList.empty() && (*squash_it)->seqNum > squash_num) {
+        DynInstPtr inst = *squash_it;
+
+        DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n",
+                inst->readPC(),
+                inst->seqNum);
+
+        // May cause problems with misc regs
+        freed_regs+= inst->numDestRegs();
+        inst->clearDependents();
+        squash_it--;
+        instList.pop_back();
+    }
+
+    frontEnd->addFreeRegs(freed_regs);
+
+    for (int i = 0; i < latency+1; ++i) {
+        numInstsToWB.advance();
+    }
+
+    squashPending = false;
+
+    // Probably want to make sure that this squash is the one that set the
+    // thread into inSyscall mode.
+    thread->inSyscall = false;
+
+    // Tell front end to squash, reset PC to new one.
+    frontEnd->squash(squash_num, next_PC);
+
+    faultFromFetch = NULL;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::squashFromXC()
+{
+    // Record that I need to squash
+    squashPending = true;
+
+    thread->inSyscall = true;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setSquashInfoFromXC()
+{
+    // Need to handle the case of the instList being empty.  In that case
+    // probably any number works, except maybe with stores in the store buffer.
+    squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1;
+
+    squashNextPC = thread->PC;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::fetchFault(Fault &fault)
+{
+    faultFromFetch = fault;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::dumpInsts()
+{
+    int num = 0;
+    int valid_num = 0;
+
+    InstListIt inst_list_it = instList.begin();
+
+    cprintf("Inst list size: %i\n", instList.size());
+
+    while (inst_list_it != instList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it++;
+        ++num;
+    }
+}
+
+template <class Impl>
+InorderBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(
+    InorderBackEnd *_be)
+    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+//    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::DCacheCompletionEvent::process()
+{
+    inst->completeAcc();
+    be->status = DcacheMissComplete;
+}
+
+template <class Impl>
+const char *
+InorderBackEnd<Impl>::DCacheCompletionEvent::description()
+{
+    return "DCache completion event";
+}
diff --git a/cpu/ozone/inst_queue.cc b/cpu/ozone/inst_queue.cc
new file mode 100644
index 000000000..9c61602d9
--- /dev/null
+++ b/cpu/ozone/inst_queue.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+#include "cpu/ozone/inst_queue_impl.hh"
+
+// Force instantiation of InstructionQueue.
+template class InstQueue<SimpleImpl>;
+template class InstQueue<OzoneImpl>;
diff --git a/cpu/ozone/inst_queue.hh b/cpu/ozone/inst_queue.hh
new file mode 100644
index 000000000..2cbbb7987
--- /dev/null
+++ b/cpu/ozone/inst_queue.hh
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_INST_QUEUE_HH__
+#define __CPU_OZONE_INST_QUEUE_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
+
+class FUPool;
+class MemInterface;
+
+/**
+ * A standard instruction queue class.  It holds ready instructions, in
+ * order, in seperate priority queues to facilitate the scheduling of
+ * instructions.  The IQ uses a separate linked list to track dependencies.
+ * Similar to the rename map and the free list, it expects that
+ * floating point registers have their indices start after the integer
+ * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
+ * and 96-191 are fp).  This remains true even for both logical and
+ * physical register indices. The IQ depends on the memory dependence unit to
+ * track when memory operations are ready in terms of ordering; register
+ * dependencies are tracked normally. Right now the IQ also handles the
+ * execution timing; this is mainly to allow back-to-back scheduling without
+ * requiring IEW to be able to peek into the IQ. At the end of the execution
+ * latency, the instruction is put into the queue to execute, where it will
+ * have the execute() function called on it.
+ * @todo: Make IQ able to handle multiple FU pools.
+ */
+template <class Impl>
+class InstQueue
+{
+  public:
+    //Typedefs from the Impl.
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::Params Params;
+    typedef typename Impl::IssueStruct IssueStruct;
+/*
+    typedef typename Impl::CPUPol::IEW IEW;
+    typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
+    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+    typedef typename Impl::CPUPol::TimeStruct TimeStruct;
+*/
+    // Typedef of iterator through the list of instructions.
+    typedef typename std::list<DynInstPtr>::iterator ListIt;
+
+    friend class Impl::FullCPU;
+#if 0
+    /** FU completion event class. */
+    class FUCompletion : public Event {
+      private:
+        /** Executing instruction. */
+        DynInstPtr inst;
+
+        /** Index of the FU used for executing. */
+        int fuIdx;
+
+        /** Pointer back to the instruction queue. */
+        InstQueue<Impl> *iqPtr;
+
+      public:
+        /** Construct a FU completion event. */
+        FUCompletion(DynInstPtr &_inst, int fu_idx,
+                     InstQueue<Impl> *iq_ptr);
+
+        virtual void process();
+        virtual const char *description();
+    };
+#endif
+    /** Constructs an IQ. */
+    InstQueue(Params *params);
+
+    /** Destructs the IQ. */
+    ~InstQueue();
+
+    /** Returns the name of the IQ. */
+    std::string name() const;
+
+    /** Registers statistics. */
+    void regStats();
+
+    /** Sets CPU pointer. */
+    void setCPU(FullCPU *_cpu) { cpu = _cpu; }
+#if 0
+    /** Sets active threads list. */
+    void setActiveThreads(list<unsigned> *at_ptr);
+
+    /** Sets the IEW pointer. */
+    void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
+#endif
+    /** Sets the timer buffer between issue and execute. */
+    void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
+#if 0
+    /** Sets the global time buffer. */
+    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
+    /** Number of entries needed for given amount of threads. */
+    int entryAmount(int num_threads);
+
+    /** Resets max entries for all threads. */
+    void resetEntries();
+#endif
+    /** Returns total number of free entries. */
+    unsigned numFreeEntries();
+
+    /** Returns number of free entries for a thread. */
+    unsigned numFreeEntries(unsigned tid);
+
+    /** Returns whether or not the IQ is full. */
+    bool isFull();
+
+    /** Returns whether or not the IQ is full for a specific thread. */
+    bool isFull(unsigned tid);
+
+    /** Returns if there are any ready instructions in the IQ. */
+    bool hasReadyInsts();
+
+    /** Inserts a new instruction into the IQ. */
+    void insert(DynInstPtr &new_inst);
+
+    /** Inserts a new, non-speculative instruction into the IQ. */
+    void insertNonSpec(DynInstPtr &new_inst);
+#if 0
+    /**
+     * Advances the tail of the IQ, used if an instruction is not added to the
+     * IQ for scheduling.
+     * @todo: Rename this function.
+     */
+    void advanceTail(DynInstPtr &inst);
+
+    /** Process FU completion event. */
+    void processFUCompletion(DynInstPtr &inst, int fu_idx);
+#endif
+    /**
+     * Schedules ready instructions, adding the ready ones (oldest first) to
+     * the queue to execute.
+     */
+    void scheduleReadyInsts();
+
+    /** Schedules a single specific non-speculative instruction. */
+    void scheduleNonSpec(const InstSeqNum &inst);
+
+    /**
+     * Commits all instructions up to and including the given sequence number,
+     * for a specific thread.
+     */
+    void commit(const InstSeqNum &inst, unsigned tid = 0);
+
+    /** Wakes all dependents of a completed instruction. */
+    void wakeDependents(DynInstPtr &completed_inst);
+
+    /** Adds a ready memory instruction to the ready list. */
+    void addReadyMemInst(DynInstPtr &ready_inst);
+#if 0
+    /**
+     * Reschedules a memory instruction. It will be ready to issue once
+     * replayMemInst() is called.
+     */
+    void rescheduleMemInst(DynInstPtr &resched_inst);
+
+    /** Replays a memory instruction. It must be rescheduled first. */
+    void replayMemInst(DynInstPtr &replay_inst);
+#endif
+    /** Completes a memory operation. */
+    void completeMemInst(DynInstPtr &completed_inst);
+#if 0
+    /** Indicates an ordering violation between a store and a load. */
+    void violation(DynInstPtr &store, DynInstPtr &faulting_load);
+#endif
+    /**
+     * Squashes instructions for a thread. Squashing information is obtained
+     * from the time buffer.
+     */
+    void squash(unsigned tid); // Probably want the ISN
+
+    /** Returns the number of used entries for a thread. */
+    unsigned getCount(unsigned tid) { return count[tid]; };
+
+    /** Updates the number of free entries. */
+    void updateFreeEntries(int num) { freeEntries += num; }
+
+    /** Debug function to print all instructions. */
+    void printInsts();
+
+  private:
+    /** Does the actual squashing. */
+    void doSquash(unsigned tid);
+
+    /////////////////////////
+    // Various pointers
+    /////////////////////////
+
+    /** Pointer to the CPU. */
+    FullCPU *cpu;
+
+    /** Cache interface. */
+    MemInterface *dcacheInterface;
+#if 0
+    /** Pointer to IEW stage. */
+    IEW *iewStage;
+
+    /** The memory dependence unit, which tracks/predicts memory dependences
+     *  between instructions.
+     */
+    MemDepUnit memDepUnit[Impl::MaxThreads];
+#endif
+    /** The queue to the execute stage.  Issued instructions will be written
+     *  into it.
+     */
+    TimeBuffer<IssueStruct> *issueToExecuteQueue;
+#if 0
+    /** The backwards time buffer. */
+    TimeBuffer<TimeStruct> *timeBuffer;
+
+    /** Wire to read information from timebuffer. */
+    typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+    /** Function unit pool. */
+    FUPool *fuPool;
+#endif
+    //////////////////////////////////////
+    // Instruction lists, ready queues, and ordering
+    //////////////////////////////////////
+
+    /** List of all the instructions in the IQ (some of which may be issued). */
+    std::list<DynInstPtr> instList[Impl::MaxThreads];
+
+    /**
+     * Struct for comparing entries to be added to the priority queue.  This
+     * gives reverse ordering to the instructions in terms of sequence
+     * numbers: the instructions with smaller sequence numbers (and hence
+     * are older) will be at the top of the priority queue.
+     */
+    struct pqCompare {
+        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+        {
+            return lhs->seqNum > rhs->seqNum;
+        }
+    };
+
+    /**
+     * Struct for an IQ entry. It includes the instruction and an iterator
+     * to the instruction's spot in the IQ.
+     */
+    struct IQEntry {
+        DynInstPtr inst;
+        ListIt iqIt;
+    };
+
+    typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
+    ReadyInstQueue;
+
+    typedef std::map<DynInstPtr, pqCompare> ReadyInstMap;
+    typedef typename std::map<DynInstPtr, pqCompare>::iterator ReadyMapIt;
+
+    /** List of ready instructions.
+     */
+    ReadyInstQueue readyInsts;
+
+    /** List of non-speculative instructions that will be scheduled
+     *  once the IQ gets a signal from commit.  While it's redundant to
+     *  have the key be a part of the value (the sequence number is stored
+     *  inside of DynInst), when these instructions are woken up only
+     *  the sequence number will be available.  Thus it is most efficient to be
+     *  able to search by the sequence number alone.
+     */
+    std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
+
+    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
+#if 0
+    /** Entry for the list age ordering by op class. */
+    struct ListOrderEntry {
+        OpClass queueType;
+        InstSeqNum oldestInst;
+    };
+
+    /** List that contains the age order of the oldest instruction of each
+     *  ready queue.  Used to select the oldest instruction available
+     *  among op classes.
+     */
+    std::list<ListOrderEntry> listOrder;
+
+    typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
+
+    /** Tracks if each ready queue is on the age order list. */
+    bool queueOnList[Num_OpClasses];
+
+    /** Iterators of each ready queue.  Points to their spot in the age order
+     *  list.
+     */
+    ListOrderIt readyIt[Num_OpClasses];
+
+    /** Add an op class to the age order list. */
+    void addToOrderList(OpClass op_class);
+
+    /**
+     * Called when the oldest instruction has been removed from a ready queue;
+     * this places that ready queue into the proper spot in the age order list.
+     */
+    void moveToYoungerInst(ListOrderIt age_order_it);
+#endif
+    //////////////////////////////////////
+    // Various parameters
+    //////////////////////////////////////
+#if 0
+    /** IQ Resource Sharing Policy */
+    enum IQPolicy {
+        Dynamic,
+        Partitioned,
+        Threshold
+    };
+
+    /** IQ sharing policy for SMT. */
+    IQPolicy iqPolicy;
+#endif
+    /** Number of Total Threads*/
+    unsigned numThreads;
+#if 0
+    /** Pointer to list of active threads. */
+    list<unsigned> *activeThreads;
+#endif
+    /** Per Thread IQ count */
+    unsigned count[Impl::MaxThreads];
+
+    /** Max IQ Entries Per Thread */
+    unsigned maxEntries[Impl::MaxThreads];
+
+    /** Number of free IQ entries left. */
+    unsigned freeEntries;
+
+    /** The number of entries in the instruction queue. */
+    unsigned numEntries;
+
+    /** The total number of instructions that can be issued in one cycle. */
+    unsigned totalWidth;
+#if 0
+    /** The number of physical registers in the CPU. */
+    unsigned numPhysRegs;
+
+    /** The number of physical integer registers in the CPU. */
+    unsigned numPhysIntRegs;
+
+    /** The number of floating point registers in the CPU. */
+    unsigned numPhysFloatRegs;
+#endif
+    /** Delay between commit stage and the IQ.
+     *  @todo: Make there be a distinction between the delays within IEW.
+     */
+    unsigned commitToIEWDelay;
+
+    //////////////////////////////////
+    // Variables needed for squashing
+    //////////////////////////////////
+
+    /** The sequence number of the squashed instruction. */
+    InstSeqNum squashedSeqNum[Impl::MaxThreads];
+
+    /** Iterator that points to the last instruction that has been squashed.
+     *  This will not be valid unless the IQ is in the process of squashing.
+     */
+    ListIt squashIt[Impl::MaxThreads];
+#if 0
+    ///////////////////////////////////
+    // Dependency graph stuff
+    ///////////////////////////////////
+
+    class DependencyEntry
+    {
+      public:
+        DependencyEntry()
+            : inst(NULL), next(NULL)
+        { }
+
+        DynInstPtr inst;
+        //Might want to include data about what arch. register the
+        //dependence is waiting on.
+        DependencyEntry *next;
+
+        //This function, and perhaps this whole class, stand out a little
+        //bit as they don't fit a classification well.  I want access
+        //to the underlying structure of the linked list, yet at
+        //the same time it feels like this should be something abstracted
+        //away.  So for now it will sit here, within the IQ, until
+        //a better implementation is decided upon.
+        // This function probably shouldn't be within the entry...
+        void insert(DynInstPtr &new_inst);
+
+        void remove(DynInstPtr &inst_to_remove);
+
+        // Debug variable, remove when done testing.
+        static unsigned mem_alloc_counter;
+    };
+
+    /** Array of linked lists.  Each linked list is a list of all the
+     *  instructions that depend upon a given register.  The actual
+     *  register's index is used to index into the graph; ie all
+     *  instructions in flight that are dependent upon r34 will be
+     *  in the linked list of dependGraph[34].
+     */
+    DependencyEntry *dependGraph;
+
+    /** A cache of the recently woken registers.  It is 1 if the register
+     *  has been woken up recently, and 0 if the register has been added
+     *  to the dependency graph and has not yet received its value.  It
+     *  is basically a secondary scoreboard, and should pretty much mirror
+     *  the scoreboard that exists in the rename map.
+     */
+    vector<bool> regScoreboard;
+
+    /** Adds an instruction to the dependency graph, as a producer. */
+    bool addToDependents(DynInstPtr &new_inst);
+
+    /** Adds an instruction to the dependency graph, as a consumer. */
+    void createDependency(DynInstPtr &new_inst);
+#endif
+    /** Moves an instruction to the ready queue if it is ready. */
+    void addIfReady(DynInstPtr &inst);
+
+    /** Debugging function to count how many entries are in the IQ.  It does
+     *  a linear walk through the instructions, so do not call this function
+     *  during normal execution.
+     */
+    int countInsts();
+#if 0
+    /** Debugging function to dump out the dependency graph.
+     */
+    void dumpDependGraph();
+#endif
+    /** Debugging function to dump all the list sizes, as well as print
+     *  out the list of nonspeculative instructions.  Should not be used
+     *  in any other capacity, but it has no harmful sideaffects.
+     */
+    void dumpLists();
+
+    /** Debugging function to dump out all instructions that are in the
+     *  IQ.
+     */
+    void dumpInsts();
+
+    /** Stat for number of instructions added. */
+    Stats::Scalar<> iqInstsAdded;
+    /** Stat for number of non-speculative instructions added. */
+    Stats::Scalar<> iqNonSpecInstsAdded;
+//    Stats::Scalar<> iqIntInstsAdded;
+    /** Stat for number of integer instructions issued. */
+    Stats::Scalar<> iqIntInstsIssued;
+//    Stats::Scalar<> iqFloatInstsAdded;
+    /** Stat for number of floating point instructions issued. */
+    Stats::Scalar<> iqFloatInstsIssued;
+//    Stats::Scalar<> iqBranchInstsAdded;
+    /** Stat for number of branch instructions issued. */
+    Stats::Scalar<> iqBranchInstsIssued;
+//    Stats::Scalar<> iqMemInstsAdded;
+    /** Stat for number of memory instructions issued. */
+    Stats::Scalar<> iqMemInstsIssued;
+//    Stats::Scalar<> iqMiscInstsAdded;
+    /** Stat for number of miscellaneous instructions issued. */
+    Stats::Scalar<> iqMiscInstsIssued;
+    /** Stat for number of squashed instructions that were ready to issue. */
+    Stats::Scalar<> iqSquashedInstsIssued;
+    /** Stat for number of squashed instructions examined when squashing. */
+    Stats::Scalar<> iqSquashedInstsExamined;
+    /** Stat for number of squashed instruction operands examined when
+     * squashing.
+     */
+    Stats::Scalar<> iqSquashedOperandsExamined;
+    /** Stat for number of non-speculative instructions removed due to a squash.
+     */
+    Stats::Scalar<> iqSquashedNonSpecRemoved;
+
+};
+
+#endif //__CPU_OZONE_INST_QUEUE_HH__
diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh
new file mode 100644
index 000000000..0523c68d6
--- /dev/null
+++ b/cpu/ozone/inst_queue_impl.hh
@@ -0,0 +1,1341 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Todo:
+// Current ordering allows for 0 cycle added-to-scheduled.  Could maybe fake
+// it; either do in reverse order, or have added instructions put into a
+// different ready queue that, in scheduleRreadyInsts(), gets put onto the
+// normal ready queue.  This would however give only a one cycle delay,
+// but probably is more flexible to actually add in a delay parameter than
+// just running it backwards.
+
+#include <vector>
+
+#include "sim/root.hh"
+
+#include "cpu/ozone/inst_queue.hh"
+#if 0
+template <class Impl>
+InstQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
+                                                   int fu_idx,
+                                                   InstQueue<Impl> *iq_ptr)
+    : Event(&mainEventQueue, Stat_Event_Pri),
+      inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::FUCompletion::process()
+{
+    iqPtr->processFUCompletion(inst, fuIdx);
+}
+
+
+template <class Impl>
+const char *
+InstQueue<Impl>::FUCompletion::description()
+{
+    return "Functional unit completion event";
+}
+#endif
+template <class Impl>
+InstQueue<Impl>::InstQueue(Params *params)
+    : dcacheInterface(params->dcacheInterface),
+//      fuPool(params->fuPool),
+      numEntries(params->numIQEntries),
+      totalWidth(params->issueWidth),
+//      numPhysIntRegs(params->numPhysIntRegs),
+//      numPhysFloatRegs(params->numPhysFloatRegs),
+      commitToIEWDelay(params->commitToIEWDelay)
+{
+//    assert(fuPool);
+
+//    numThreads = params->numberOfThreads;
+    numThreads = 1;
+
+    //Initialize thread IQ counts
+    for (int i = 0; i <numThreads; i++) {
+        count[i] = 0;
+    }
+
+    // Initialize the number of free IQ entries.
+    freeEntries = numEntries;
+
+    // Set the number of physical registers as the number of int + float
+//    numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+
+//    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
+
+    //Create an entry for each physical register within the
+    //dependency graph.
+//    dependGraph = new DependencyEntry[numPhysRegs];
+
+    // Resize the register scoreboard.
+//    regScoreboard.resize(numPhysRegs);
+/*
+    //Initialize Mem Dependence Units
+    for (int i = 0; i < numThreads; i++) {
+        memDepUnit[i].init(params,i);
+        memDepUnit[i].setIQ(this);
+    }
+
+    // Initialize all the head pointers to point to NULL, and all the
+    // entries as unready.
+    // Note that in actuality, the registers corresponding to the logical
+    // registers start off as ready.  However this doesn't matter for the
+    // IQ as the instruction should have been correctly told if those
+    // registers are ready in rename.  Thus it can all be initialized as
+    // unready.
+    for (int i = 0; i < numPhysRegs; ++i) {
+        dependGraph[i].next = NULL;
+        dependGraph[i].inst = NULL;
+        regScoreboard[i] = false;
+    }
+*/
+    for (int i = 0; i < numThreads; ++i) {
+        squashedSeqNum[i] = 0;
+    }
+/*
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        queueOnList[i] = false;
+        readyIt[i] = listOrder.end();
+    }
+
+    string policy = params->smtIQPolicy;
+
+    //Convert string to lowercase
+    std::transform(policy.begin(), policy.end(), policy.begin(),
+                   (int(*)(int)) tolower);
+
+    //Figure out resource sharing policy
+    if (policy == "dynamic") {
+        iqPolicy = Dynamic;
+
+        //Set Max Entries to Total ROB Capacity
+        for (int i = 0; i < numThreads; i++) {
+            maxEntries[i] = numEntries;
+        }
+
+    } else if (policy == "partitioned") {
+        iqPolicy = Partitioned;
+
+        //@todo:make work if part_amt doesnt divide evenly.
+        int part_amt = numEntries / numThreads;
+
+        //Divide ROB up evenly
+        for (int i = 0; i < numThreads; i++) {
+            maxEntries[i] = part_amt;
+        }
+
+        DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
+                "%i entries per thread.\n",part_amt);
+
+    } else if (policy == "threshold") {
+        iqPolicy = Threshold;
+
+        double threshold =  (double)params->smtIQThreshold / 100;
+
+        int thresholdIQ = (int)((double)threshold * numEntries);
+
+        //Divide up by threshold amount
+        for (int i = 0; i < numThreads; i++) {
+            maxEntries[i] = thresholdIQ;
+        }
+
+        DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
+                "%i entries per thread.\n",thresholdIQ);
+   } else {
+       assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
+              "Partitioned, Threshold}");
+   }
+*/
+}
+
+template <class Impl>
+InstQueue<Impl>::~InstQueue()
+{
+    // Clear the dependency graph
+/*
+    DependencyEntry *curr;
+    DependencyEntry *prev;
+
+    for (int i = 0; i < numPhysRegs; ++i) {
+        curr = dependGraph[i].next;
+
+        while (curr) {
+            DependencyEntry::mem_alloc_counter--;
+
+            prev = curr;
+            curr = prev->next;
+            prev->inst = NULL;
+
+            delete prev;
+        }
+
+        if (dependGraph[i].inst) {
+            dependGraph[i].inst = NULL;
+        }
+
+        dependGraph[i].next = NULL;
+    }
+
+    assert(DependencyEntry::mem_alloc_counter == 0);
+
+    delete [] dependGraph;
+*/
+}
+
+template <class Impl>
+std::string
+InstQueue<Impl>::name() const
+{
+    return cpu->name() + ".iq";
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::regStats()
+{
+    iqInstsAdded
+        .name(name() + ".iqInstsAdded")
+        .desc("Number of instructions added to the IQ (excludes non-spec)")
+        .prereq(iqInstsAdded);
+
+    iqNonSpecInstsAdded
+        .name(name() + ".iqNonSpecInstsAdded")
+        .desc("Number of non-speculative instructions added to the IQ")
+        .prereq(iqNonSpecInstsAdded);
+
+//    iqIntInstsAdded;
+
+    iqIntInstsIssued
+        .name(name() + ".iqIntInstsIssued")
+        .desc("Number of integer instructions issued")
+        .prereq(iqIntInstsIssued);
+
+//    iqFloatInstsAdded;
+
+    iqFloatInstsIssued
+        .name(name() + ".iqFloatInstsIssued")
+        .desc("Number of float instructions issued")
+        .prereq(iqFloatInstsIssued);
+
+//    iqBranchInstsAdded;
+
+    iqBranchInstsIssued
+        .name(name() + ".iqBranchInstsIssued")
+        .desc("Number of branch instructions issued")
+        .prereq(iqBranchInstsIssued);
+
+//    iqMemInstsAdded;
+
+    iqMemInstsIssued
+        .name(name() + ".iqMemInstsIssued")
+        .desc("Number of memory instructions issued")
+        .prereq(iqMemInstsIssued);
+
+//    iqMiscInstsAdded;
+
+    iqMiscInstsIssued
+        .name(name() + ".iqMiscInstsIssued")
+        .desc("Number of miscellaneous instructions issued")
+        .prereq(iqMiscInstsIssued);
+
+    iqSquashedInstsIssued
+        .name(name() + ".iqSquashedInstsIssued")
+        .desc("Number of squashed instructions issued")
+        .prereq(iqSquashedInstsIssued);
+
+    iqSquashedInstsExamined
+        .name(name() + ".iqSquashedInstsExamined")
+        .desc("Number of squashed instructions iterated over during squash;"
+              " mainly for profiling")
+        .prereq(iqSquashedInstsExamined);
+
+    iqSquashedOperandsExamined
+        .name(name() + ".iqSquashedOperandsExamined")
+        .desc("Number of squashed operands that are examined and possibly "
+              "removed from graph")
+        .prereq(iqSquashedOperandsExamined);
+
+    iqSquashedNonSpecRemoved
+        .name(name() + ".iqSquashedNonSpecRemoved")
+        .desc("Number of squashed non-spec instructions that were removed")
+        .prereq(iqSquashedNonSpecRemoved);
+/*
+    for ( int i=0; i < numThreads; i++) {
+        // Tell mem dependence unit to reg stats as well.
+        memDepUnit[i].regStats();
+    }
+*/
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
+    DPRINTF(IQ, "Setting active threads list pointer.\n");
+    activeThreads = at_ptr;
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
+{
+    DPRINTF(IQ, "Set the issue to execute queue.\n");
+    issueToExecuteQueue = i2e_ptr;
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
+    DPRINTF(IQ, "Set the time buffer.\n");
+    timeBuffer = tb_ptr;
+
+    fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+}
+
+template <class Impl>
+int
+InstQueue<Impl>::entryAmount(int num_threads)
+{
+    if (iqPolicy == Partitioned) {
+        return numEntries / num_threads;
+    } else {
+        return 0;
+    }
+}
+
+
+template <class Impl>
+void
+InstQueue<Impl>::resetEntries()
+{
+    if (iqPolicy != Dynamic || numThreads > 1) {
+        int active_threads = (*activeThreads).size();
+
+        list<unsigned>::iterator threads  = (*activeThreads).begin();
+        list<unsigned>::iterator list_end = (*activeThreads).end();
+
+        while (threads != list_end) {
+            if (iqPolicy == Partitioned) {
+                maxEntries[*threads++] = numEntries / active_threads;
+            } else if(iqPolicy == Threshold && active_threads == 1) {
+                maxEntries[*threads++] = numEntries;
+            }
+        }
+    }
+}
+*/
+template <class Impl>
+unsigned
+InstQueue<Impl>::numFreeEntries()
+{
+    return freeEntries;
+}
+
+template <class Impl>
+unsigned
+InstQueue<Impl>::numFreeEntries(unsigned tid)
+{
+    return maxEntries[tid] - count[tid];
+}
+
+// Might want to do something more complex if it knows how many instructions
+// will be issued this cycle.
+template <class Impl>
+bool
+InstQueue<Impl>::isFull()
+{
+    if (freeEntries == 0) {
+        return(true);
+    } else {
+        return(false);
+    }
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::isFull(unsigned tid)
+{
+    if (numFreeEntries(tid) == 0) {
+        return(true);
+    } else {
+        return(false);
+    }
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::hasReadyInsts()
+{
+/*
+    if (!listOrder.empty()) {
+        return true;
+    }
+
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        if (!readyInsts[i].empty()) {
+            return true;
+        }
+    }
+
+    return false;
+*/
+    return readyInsts.empty();
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::insert(DynInstPtr &new_inst)
+{
+    // Make sure the instruction is valid
+    assert(new_inst);
+
+    DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+            new_inst->readPC());
+
+    // Check if there are any free entries.  Panic if there are none.
+    // Might want to have this return a fault in the future instead of
+    // panicing.
+    assert(freeEntries != 0);
+
+    instList[new_inst->threadNumber].push_back(new_inst);
+
+    // Decrease the number of free entries.
+    --freeEntries;
+
+    //Mark Instruction as in IQ
+//    new_inst->setInIQ();
+/*
+    // Look through its source registers (physical regs), and mark any
+    // dependencies.
+    addToDependents(new_inst);
+
+    // Have this instruction set itself as the producer of its destination
+    // register(s).
+    createDependency(new_inst);
+*/
+    // If it's a memory instruction, add it to the memory dependency
+    // unit.
+//    if (new_inst->isMemRef()) {
+//        memDepUnit[new_inst->threadNumber].insert(new_inst);
+//    } else {
+        // If the instruction is ready then add it to the ready list.
+        addIfReady(new_inst);
+//    }
+
+    ++iqInstsAdded;
+
+
+    //Update Thread IQ Count
+    count[new_inst->threadNumber]++;
+
+    assert(freeEntries == (numEntries - countInsts()));
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
+{
+    nonSpecInsts[new_inst->seqNum] = new_inst;
+
+    // @todo: Clean up this code; can do it by setting inst as unable
+    // to issue, then calling normal insert on the inst.
+
+    // Make sure the instruction is valid
+    assert(new_inst);
+
+    DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+            new_inst->readPC());
+
+    // Check if there are any free entries.  Panic if there are none.
+    // Might want to have this return a fault in the future instead of
+    // panicing.
+    assert(freeEntries != 0);
+
+    instList[new_inst->threadNumber].push_back(new_inst);
+
+    // Decrease the number of free entries.
+    --freeEntries;
+
+    //Mark Instruction as in IQ
+//    new_inst->setInIQ();
+/*
+    // Have this instruction set itself as the producer of its destination
+    // register(s).
+    createDependency(new_inst);
+
+    // If it's a memory instruction, add it to the memory dependency
+    // unit.
+    if (new_inst->isMemRef()) {
+        memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
+    }
+*/
+    ++iqNonSpecInstsAdded;
+
+    //Update Thread IQ Count
+    count[new_inst->threadNumber]++;
+
+    assert(freeEntries == (numEntries - countInsts()));
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::advanceTail(DynInstPtr &inst)
+{
+    // Have this instruction set itself as the producer of its destination
+    // register(s).
+    createDependency(inst);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::addToOrderList(OpClass op_class)
+{
+    assert(!readyInsts[op_class].empty());
+
+    ListOrderEntry queue_entry;
+
+    queue_entry.queueType = op_class;
+
+    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+
+    ListOrderIt list_it = listOrder.begin();
+    ListOrderIt list_end_it = listOrder.end();
+
+    while (list_it != list_end_it) {
+        if ((*list_it).oldestInst > queue_entry.oldestInst) {
+            break;
+        }
+
+        list_it++;
+    }
+
+    readyIt[op_class] = listOrder.insert(list_it, queue_entry);
+    queueOnList[op_class] = true;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
+{
+    // Get iterator of next item on the list
+    // Delete the original iterator
+    // Determine if the next item is either the end of the list or younger
+    // than the new instruction.  If so, then add in a new iterator right here.
+    // If not, then move along.
+    ListOrderEntry queue_entry;
+    OpClass op_class = (*list_order_it).queueType;
+    ListOrderIt next_it = list_order_it;
+
+    ++next_it;
+
+    queue_entry.queueType = op_class;
+    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+
+    while (next_it != listOrder.end() &&
+           (*next_it).oldestInst < queue_entry.oldestInst) {
+        ++next_it;
+    }
+
+    readyIt[op_class] = listOrder.insert(next_it, queue_entry);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
+{
+    // The CPU could have been sleeping until this op completed (*extremely*
+    // long latency op).  Wake it if it was.  This may be overkill.
+    iewStage->wakeCPU();
+
+    fuPool->freeUnit(fu_idx);
+
+    int &size = issueToExecuteQueue->access(0)->size;
+
+    issueToExecuteQueue->access(0)->insts[size++] = inst;
+}
+*/
+// @todo: Figure out a better way to remove the squashed items from the
+// lists.  Checking the top item of each list to see if it's squashed
+// wastes time and forces jumps.
+template <class Impl>
+void
+InstQueue<Impl>::scheduleReadyInsts()
+{
+    DPRINTF(IQ, "Attempting to schedule ready instructions from "
+            "the IQ.\n");
+
+//    IssueStruct *i2e_info = issueToExecuteQueue->access(0);
+/*
+    // Will need to reorder the list if either a queue is not on the list,
+    // or it has an older instruction than last time.
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        if (!readyInsts[i].empty()) {
+            if (!queueOnList[i]) {
+                addToOrderList(OpClass(i));
+            } else if (readyInsts[i].top()->seqNum  <
+                       (*readyIt[i]).oldestInst) {
+                listOrder.erase(readyIt[i]);
+                addToOrderList(OpClass(i));
+            }
+        }
+    }
+
+    // Have iterator to head of the list
+    // While I haven't exceeded bandwidth or reached the end of the list,
+    // Try to get a FU that can do what this op needs.
+    // If successful, change the oldestInst to the new top of the list, put
+    // the queue in the proper place in the list.
+    // Increment the iterator.
+    // This will avoid trying to schedule a certain op class if there are no
+    // FUs that handle it.
+    ListOrderIt order_it = listOrder.begin();
+    ListOrderIt order_end_it = listOrder.end();
+    int total_issued = 0;
+    int exec_queue_slot = i2e_info->size;
+
+    while (exec_queue_slot < totalWidth && order_it != order_end_it) {
+        OpClass op_class = (*order_it).queueType;
+
+        assert(!readyInsts[op_class].empty());
+
+        DynInstPtr issuing_inst = readyInsts[op_class].top();
+
+        assert(issuing_inst->seqNum == (*order_it).oldestInst);
+
+        if (issuing_inst->isSquashed()) {
+            readyInsts[op_class].pop();
+
+            if (!readyInsts[op_class].empty()) {
+                moveToYoungerInst(order_it);
+            } else {
+                readyIt[op_class] = listOrder.end();
+                queueOnList[op_class] = false;
+            }
+
+            listOrder.erase(order_it++);
+
+            ++iqSquashedInstsIssued;
+
+            continue;
+        }
+
+        int idx = fuPool->getUnit(op_class);
+
+        if (idx != -1) {
+            int op_latency = fuPool->getOpLatency(op_class);
+
+            if (op_latency == 1) {
+                i2e_info->insts[exec_queue_slot++] = issuing_inst;
+                i2e_info->size++;
+
+                // Add the FU onto the list of FU's to be freed next cycle.
+                fuPool->freeUnit(idx);
+            } else {
+                int issue_latency = fuPool->getIssueLatency(op_class);
+
+                if (issue_latency > 1) {
+                    // Generate completion event for the FU
+                    FUCompletion *execution = new FUCompletion(issuing_inst,
+                                                               idx, this);
+
+                    execution->schedule(curTick + issue_latency - 1);
+                } else {
+                    i2e_info->insts[exec_queue_slot++] = issuing_inst;
+                    i2e_info->size++;
+
+                    // Add the FU onto the list of FU's to be freed next cycle.
+                    fuPool->freeUnit(idx);
+                }
+            }
+
+            DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+                    "[sn:%lli]\n",
+                    issuing_inst->threadNumber, issuing_inst->readPC(),
+                    issuing_inst->seqNum);
+
+            readyInsts[op_class].pop();
+
+            if (!readyInsts[op_class].empty()) {
+                moveToYoungerInst(order_it);
+            } else {
+                readyIt[op_class] = listOrder.end();
+                queueOnList[op_class] = false;
+            }
+
+            issuing_inst->setIssued();
+            ++total_issued;
+
+            if (!issuing_inst->isMemRef()) {
+                // Memory instructions can not be freed from the IQ until they
+                // complete.
+                ++freeEntries;
+                count[issuing_inst->threadNumber]--;
+                issuing_inst->removeInIQ();
+            } else {
+                memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+            }
+
+            listOrder.erase(order_it++);
+        } else {
+            ++order_it;
+        }
+    }
+
+    if (total_issued) {
+        cpu->activityThisCycle();
+    } else {
+        DPRINTF(IQ, "Not able to schedule any instructions.\n");
+    }
+*/
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+{
+    DPRINTF(IQ, "Marking nonspeculative instruction with sequence "
+            "number %i as ready to execute.\n", inst);
+
+    NonSpecMapIt inst_it = nonSpecInsts.find(inst);
+
+    assert(inst_it != nonSpecInsts.end());
+
+//    unsigned tid = (*inst_it).second->threadNumber;
+
+    // Mark this instruction as ready to issue.
+    (*inst_it).second->setCanIssue();
+
+    // Now schedule the instruction.
+//    if (!(*inst_it).second->isMemRef()) {
+        addIfReady((*inst_it).second);
+//    } else {
+//        memDepUnit[tid].nonSpecInstReady((*inst_it).second);
+//    }
+
+    nonSpecInsts.erase(inst_it);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+{
+    /*Need to go through each thread??*/
+    DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
+            tid,inst);
+
+    ListIt iq_it = instList[tid].begin();
+
+    while (iq_it != instList[tid].end() &&
+           (*iq_it)->seqNum <= inst) {
+        ++iq_it;
+        instList[tid].pop_front();
+    }
+
+    assert(freeEntries == (numEntries - countInsts()));
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+{
+    DPRINTF(IQ, "Waking dependents of completed instruction.\n");
+    // Look at the physical destination register of the DynInst
+    // and look it up on the dependency graph.  Then mark as ready
+    // any instructions within the instruction queue.
+/*
+    DependencyEntry *curr;
+    DependencyEntry *prev;
+*/
+    // Tell the memory dependence unit to wake any dependents on this
+    // instruction if it is a memory instruction.  Also complete the memory
+    // instruction at this point since we know it executed fine.
+    // @todo: Might want to rename "completeMemInst" to
+    // something that indicates that it won't need to be replayed, and call
+    // this earlier.  Might not be a big deal.
+    if (completed_inst->isMemRef()) {
+//        memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
+        completeMemInst(completed_inst);
+    }
+    completed_inst->wakeDependents();
+/*
+    for (int dest_reg_idx = 0;
+         dest_reg_idx < completed_inst->numDestRegs();
+         dest_reg_idx++)
+    {
+        PhysRegIndex dest_reg =
+            completed_inst->renamedDestRegIdx(dest_reg_idx);
+
+        // Special case of uniq or control registers.  They are not
+        // handled by the IQ and thus have no dependency graph entry.
+        // @todo Figure out a cleaner way to handle this.
+        if (dest_reg >= numPhysRegs) {
+            continue;
+        }
+
+        DPRINTF(IQ, "Waking any dependents on register %i.\n",
+                (int) dest_reg);
+
+        //Maybe abstract this part into a function.
+        //Go through the dependency chain, marking the registers as ready
+        //within the waiting instructions.
+
+        curr = dependGraph[dest_reg].next;
+
+        while (curr) {
+            DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
+                    curr->inst->readPC());
+
+            // Might want to give more information to the instruction
+            // so that it knows which of its source registers is ready.
+            // However that would mean that the dependency graph entries
+            // would need to hold the src_reg_idx.
+            curr->inst->markSrcRegReady();
+
+            addIfReady(curr->inst);
+
+            DependencyEntry::mem_alloc_counter--;
+
+            prev = curr;
+            curr = prev->next;
+            prev->inst = NULL;
+
+            delete prev;
+        }
+
+        // Reset the head node now that all of its dependents have been woken
+        // up.
+        dependGraph[dest_reg].next = NULL;
+        dependGraph[dest_reg].inst = NULL;
+
+        // Mark the scoreboard as having that register ready.
+        regScoreboard[dest_reg] = true;
+    }
+*/
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
+{
+    OpClass op_class = ready_inst->opClass();
+
+    readyInsts.push(ready_inst);
+
+    DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+            "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+            ready_inst->readPC(), op_class, ready_inst->seqNum);
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
+{
+    memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
+{
+    memDepUnit[replay_inst->threadNumber].replay(replay_inst);
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
+{
+    int tid = completed_inst->threadNumber;
+
+    DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
+            completed_inst->readPC(), completed_inst->seqNum);
+
+    ++freeEntries;
+
+//    completed_inst->memOpDone = true;
+
+//    memDepUnit[tid].completed(completed_inst);
+
+    count[tid]--;
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::violation(DynInstPtr &store,
+                                  DynInstPtr &faulting_load)
+{
+    memDepUnit[store->threadNumber].violation(store, faulting_load);
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::squash(unsigned tid)
+{
+    DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
+            "the IQ.\n", tid);
+
+    // Read instruction sequence number of last instruction out of the
+    // time buffer.
+//    squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+
+    // Setup the squash iterator to point to the tail.
+    squashIt[tid] = instList[tid].end();
+    --squashIt[tid];
+
+    // Call doSquash if there are insts in the IQ
+    if (count[tid] > 0) {
+        doSquash(tid);
+    }
+
+    // Also tell the memory dependence unit to squash.
+//    memDepUnit[tid].squash(squashedSeqNum[tid], tid);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::doSquash(unsigned tid)
+{
+    // Make sure the squashed sequence number is valid.
+    assert(squashedSeqNum[tid] != 0);
+
+    DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
+            tid, squashedSeqNum[tid]);
+
+    // Squash any instructions younger than the squashed sequence number
+    // given.
+    while (squashIt[tid] != instList[tid].end() &&
+           (*squashIt[tid])->seqNum > squashedSeqNum[tid]) {
+
+        DynInstPtr squashed_inst = (*squashIt[tid]);
+
+        // Only handle the instruction if it actually is in the IQ and
+        // hasn't already been squashed in the IQ.
+        if (squashed_inst->threadNumber != tid ||
+            squashed_inst->isSquashedInIQ()) {
+            --squashIt[tid];
+            continue;
+        }
+
+        if (!squashed_inst->isIssued() ||
+            (squashed_inst->isMemRef()/* &&
+                                         !squashed_inst->memOpDone*/)) {
+
+            // Remove the instruction from the dependency list.
+            if (!squashed_inst->isNonSpeculative()) {
+/*
+                for (int src_reg_idx = 0;
+                     src_reg_idx < squashed_inst->numSrcRegs();
+                     src_reg_idx++)
+                {
+                    PhysRegIndex src_reg =
+                        squashed_inst->renamedSrcRegIdx(src_reg_idx);
+
+                    // Only remove it from the dependency graph if it was
+                    // placed there in the first place.
+                    // HACK: This assumes that instructions woken up from the
+                    // dependency chain aren't informed that a specific src
+                    // register has become ready.  This may not always be true
+                    // in the future.
+                    // Instead of doing a linked list traversal, we can just
+                    // remove these squashed instructions either at issue time,
+                    // or when the register is overwritten.  The only downside
+                    // to this is it leaves more room for error.
+
+                    if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+                        src_reg < numPhysRegs) {
+                        dependGraph[src_reg].remove(squashed_inst);
+                    }
+
+
+                    ++iqSquashedOperandsExamined;
+                }
+*/
+                // Might want to remove producers as well.
+            } else {
+                nonSpecInsts[squashed_inst->seqNum] = NULL;
+
+                nonSpecInsts.erase(squashed_inst->seqNum);
+
+                ++iqSquashedNonSpecRemoved;
+            }
+
+            // Might want to also clear out the head of the dependency graph.
+
+            // Mark it as squashed within the IQ.
+            squashed_inst->setSquashedInIQ();
+
+            // @todo: Remove this hack where several statuses are set so the
+            // inst will flow through the rest of the pipeline.
+            squashed_inst->setIssued();
+            squashed_inst->setCanCommit();
+//            squashed_inst->removeInIQ();
+
+            //Update Thread IQ Count
+            count[squashed_inst->threadNumber]--;
+
+            ++freeEntries;
+
+            if (numThreads > 1) {
+                DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n",
+                        tid, squashed_inst->readPC());
+            } else {
+                DPRINTF(IQ, "Instruction PC %#x squashed.\n",
+                        squashed_inst->readPC());
+            }
+        }
+
+        --squashIt[tid];
+        ++iqSquashedInstsExamined;
+    }
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
+{
+    //Add this new, dependent instruction at the head of the dependency
+    //chain.
+
+    // First create the entry that will be added to the head of the
+    // dependency chain.
+    DependencyEntry *new_entry = new DependencyEntry;
+    new_entry->next = this->next;
+    new_entry->inst = new_inst;
+
+    // Then actually add it to the chain.
+    this->next = new_entry;
+
+    ++mem_alloc_counter;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
+{
+    DependencyEntry *prev = this;
+    DependencyEntry *curr = this->next;
+
+    // Make sure curr isn't NULL.  Because this instruction is being
+    // removed from a dependency list, it must have been placed there at
+    // an earlier time.  The dependency chain should not be empty,
+    // unless the instruction dependent upon it is already ready.
+    if (curr == NULL) {
+        return;
+    }
+
+    // Find the instruction to remove within the dependency linked list.
+    while (curr->inst != inst_to_remove) {
+        prev = curr;
+        curr = curr->next;
+
+        assert(curr != NULL);
+    }
+
+    // Now remove this instruction from the list.
+    prev->next = curr->next;
+
+    --mem_alloc_counter;
+
+    // Could push this off to the destructor of DependencyEntry
+    curr->inst = NULL;
+
+    delete curr;
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::addToDependents(DynInstPtr &new_inst)
+{
+    // Loop through the instruction's source registers, adding
+    // them to the dependency list if they are not ready.
+    int8_t total_src_regs = new_inst->numSrcRegs();
+    bool return_val = false;
+
+    for (int src_reg_idx = 0;
+         src_reg_idx < total_src_regs;
+         src_reg_idx++)
+    {
+        // Only add it to the dependency graph if it's not ready.
+        if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
+            PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
+
+            // Check the IQ's scoreboard to make sure the register
+            // hasn't become ready while the instruction was in flight
+            // between stages.  Only if it really isn't ready should
+            // it be added to the dependency graph.
+            if (src_reg >= numPhysRegs) {
+                continue;
+            } else if (regScoreboard[src_reg] == false) {
+                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                        "is being added to the dependency chain.\n",
+                        new_inst->readPC(), src_reg);
+
+                dependGraph[src_reg].insert(new_inst);
+
+                // Change the return value to indicate that something
+                // was added to the dependency graph.
+                return_val = true;
+            } else {
+                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                        "became ready before it reached the IQ.\n",
+                        new_inst->readPC(), src_reg);
+                // Mark a register ready within the instruction.
+                new_inst->markSrcRegReady();
+            }
+        }
+    }
+
+    return return_val;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::createDependency(DynInstPtr &new_inst)
+{
+    //Actually nothing really needs to be marked when an
+    //instruction becomes the producer of a register's value,
+    //but for convenience a ptr to the producing instruction will
+    //be placed in the head node of the dependency links.
+    int8_t total_dest_regs = new_inst->numDestRegs();
+
+    for (int dest_reg_idx = 0;
+         dest_reg_idx < total_dest_regs;
+         dest_reg_idx++)
+    {
+        PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+
+        // Instructions that use the misc regs will have a reg number
+        // higher than the normal physical registers.  In this case these
+        // registers are not renamed, and there is no need to track
+        // dependencies as these instructions must be executed at commit.
+        if (dest_reg >= numPhysRegs) {
+            continue;
+        }
+
+        if (dependGraph[dest_reg].next) {
+            dumpDependGraph();
+            panic("Dependency graph %i not empty!", dest_reg);
+        }
+
+        dependGraph[dest_reg].inst = new_inst;
+
+        // Mark the scoreboard to say it's not yet ready.
+        regScoreboard[dest_reg] = false;
+    }
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::addIfReady(DynInstPtr &inst)
+{
+    //If the instruction now has all of its source registers
+    // available, then add it to the list of ready instructions.
+    if (inst->readyToIssue()) {
+
+        //Add the instruction to the proper ready list.
+        if (inst->isMemRef()) {
+
+            DPRINTF(IQ, "Checking if memory instruction can issue.\n");
+
+            // Message to the mem dependence unit that this instruction has
+            // its registers ready.
+
+//            memDepUnit[inst->threadNumber].regsReady(inst);
+
+            return;
+        }
+
+        OpClass op_class = inst->opClass();
+
+        DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+                "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+                inst->readPC(), op_class, inst->seqNum);
+
+        readyInsts.push(inst);
+    }
+}
+
+template <class Impl>
+int
+InstQueue<Impl>::countInsts()
+{
+    //ksewell:This works but definitely could use a cleaner write
+    //with a more intuitive way of counting. Right now it's
+    //just brute force ....
+
+#if 0
+    int total_insts = 0;
+
+    for (int i = 0; i < numThreads; ++i) {
+        ListIt count_it = instList[i].begin();
+
+        while (count_it != instList[i].end()) {
+            if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
+                if (!(*count_it)->isIssued()) {
+                    ++total_insts;
+                } else if ((*count_it)->isMemRef() &&
+                           !(*count_it)->memOpDone) {
+                    // Loads that have not been marked as executed still count
+                    // towards the total instructions.
+                    ++total_insts;
+                }
+            }
+
+            ++count_it;
+        }
+    }
+
+    return total_insts;
+#else
+    return numEntries - freeEntries;
+#endif
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::dumpDependGraph()
+{
+    DependencyEntry *curr;
+
+    for (int i = 0; i < numPhysRegs; ++i)
+    {
+        curr = &dependGraph[i];
+
+        if (curr->inst) {
+            cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+                    i, curr->inst->readPC(), curr->inst->seqNum);
+        } else {
+            cprintf("dependGraph[%i]: No producer. consumer: ", i);
+        }
+
+        while (curr->next != NULL) {
+            curr = curr->next;
+
+            cprintf("%#x [sn:%lli] ",
+                    curr->inst->readPC(), curr->inst->seqNum);
+        }
+
+        cprintf("\n");
+    }
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::dumpLists()
+{
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        cprintf("Ready list %i size: %i\n", i, readyInsts.size());
+
+        cprintf("\n");
+    }
+
+    cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+
+    NonSpecMapIt non_spec_it = nonSpecInsts.begin();
+    NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
+
+    cprintf("Non speculative list: ");
+
+    while (non_spec_it != non_spec_end_it) {
+        cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+                (*non_spec_it).second->seqNum);
+        ++non_spec_it;
+    }
+
+    cprintf("\n");
+/*
+    ListOrderIt list_order_it = listOrder.begin();
+    ListOrderIt list_order_end_it = listOrder.end();
+    int i = 1;
+
+    cprintf("List order: ");
+
+    while (list_order_it != list_order_end_it) {
+        cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
+                (*list_order_it).oldestInst);
+
+        ++list_order_it;
+        ++i;
+    }
+*/
+    cprintf("\n");
+}
+
+
+template <class Impl>
+void
+InstQueue<Impl>::dumpInsts()
+{
+    for (int i = 0; i < numThreads; ++i) {
+//        int num = 0;
+//        int valid_num = 0;
+/*
+      ListIt inst_list_it = instList[i].begin();
+
+        while (inst_list_it != instList[i].end())
+        {
+            cprintf("Instruction:%i\n",
+                    num);
+            if (!(*inst_list_it)->isSquashed()) {
+                if (!(*inst_list_it)->isIssued()) {
+                    ++valid_num;
+                    cprintf("Count:%i\n", valid_num);
+                } else if ((*inst_list_it)->isMemRef() &&
+                           !(*inst_list_it)->memOpDone) {
+                    // Loads that have not been marked as executed still count
+                    // towards the total instructions.
+                    ++valid_num;
+                    cprintf("Count:%i\n", valid_num);
+                }
+            }
+
+            cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                    "Issued:%i\nSquashed:%i\n",
+                    (*inst_list_it)->readPC(),
+                    (*inst_list_it)->seqNum,
+                    (*inst_list_it)->threadNumber,
+                    (*inst_list_it)->isIssued(),
+                    (*inst_list_it)->isSquashed());
+
+            if ((*inst_list_it)->isMemRef()) {
+                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+            }
+
+            cprintf("\n");
+
+            inst_list_it++;
+            ++num;
+        }
+*/
+    }
+}
diff --git a/cpu/ozone/lsq_unit.cc b/cpu/ozone/lsq_unit.cc
new file mode 100644
index 000000000..3ac51b87d
--- /dev/null
+++ b/cpu/ozone/lsq_unit.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/lsq_unit_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class OzoneLSQ<OzoneImpl>;
+
diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh
new file mode 100644
index 000000000..3c3e3988c
--- /dev/null
+++ b/cpu/ozone/lsq_unit.hh
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_LSQ_UNIT_HH__
+#define __CPU_OZONE_LSQ_UNIT_HH__
+
+#include <map>
+#include <queue>
+#include <algorithm>
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/mem_interface.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+class PageTable;
+
+/**
+ * Class that implements the actual LQ and SQ for each specific thread.
+ * Both are circular queues; load entries are freed upon committing, while
+ * store entries are freed once they writeback. The LSQUnit tracks if there
+ * are memory ordering violations, and also detects partial load to store
+ * forwarding cases (a store only has part of a load's data) that requires
+ * the load to wait until the store writes back. In the former case it
+ * holds onto the instruction until the dependence unit looks at it, and
+ * in the latter it stalls the LSQ until the store writes back. At that
+ * point the load is replayed.
+ */
+template <class Impl>
+class OzoneLSQ {
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::BackEnd BackEnd;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::IssueStruct IssueStruct;
+
+    typedef TheISA::IntReg IntReg;
+
+    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
+
+  private:
+    class StoreCompletionEvent : public Event {
+      public:
+        /** Constructs a store completion event. */
+        StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr);
+
+        /** Processes the store completion event. */
+        void process();
+
+        /** Returns the description of this event. */
+        const char *description();
+
+      private:
+        /** The store index of the store being written back. */
+        int storeIdx;
+        /** The writeback event for the store.  Needed for store
+         * conditionals.
+         */
+        Event *wbEvent;
+        /** The pointer to the LSQ unit that issued the store. */
+        OzoneLSQ<Impl> *lsqPtr;
+    };
+
+    friend class StoreCompletionEvent;
+
+  public:
+    /** Constructs an LSQ unit. init() must be called prior to use. */
+    OzoneLSQ();
+
+    /** Initializes the LSQ unit with the specified number of entries. */
+    void init(Params *params, unsigned maxLQEntries,
+              unsigned maxSQEntries, unsigned id);
+
+    /** Returns the name of the LSQ unit. */
+    std::string name() const;
+
+    /** Sets the CPU pointer. */
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    /** Sets the back-end stage pointer. */
+    void setBE(BackEnd *be_ptr)
+    { be = be_ptr; }
+
+    /** Sets the page table pointer. */
+    void setPageTable(PageTable *pt_ptr);
+
+    /** Ticks the LSQ unit, which in this case only resets the number of
+     * used cache ports.
+     * @todo: Move the number of used ports up to the LSQ level so it can
+     * be shared by all LSQ units.
+     */
+    void tick() { usedPorts = 0; }
+
+    /** Inserts an instruction. */
+    void insert(DynInstPtr &inst);
+    /** Inserts a load instruction. */
+    void insertLoad(DynInstPtr &load_inst);
+    /** Inserts a store instruction. */
+    void insertStore(DynInstPtr &store_inst);
+
+    /** Executes a load instruction. */
+    Fault executeLoad(DynInstPtr &inst);
+
+    Fault executeLoad(int lq_idx);
+    /** Executes a store instruction. */
+    Fault executeStore(DynInstPtr &inst);
+
+    /** Commits the head load. */
+    void commitLoad();
+    /** Commits a specific load, given by the sequence number. */
+    void commitLoad(InstSeqNum &inst);
+    /** Commits loads older than a specific sequence number. */
+    void commitLoads(InstSeqNum &youngest_inst);
+
+    /** Commits stores older than a specific sequence number. */
+    void commitStores(InstSeqNum &youngest_inst);
+
+    /** Writes back stores. */
+    void writebackStores();
+
+    // @todo: Include stats in the LSQ unit.
+    //void regStats();
+
+    /** Clears all the entries in the LQ. */
+    void clearLQ();
+
+    /** Clears all the entries in the SQ. */
+    void clearSQ();
+
+    /** Resizes the LQ to a given size. */
+    void resizeLQ(unsigned size);
+
+    /** Resizes the SQ to a given size. */
+    void resizeSQ(unsigned size);
+
+    /** Squashes all instructions younger than a specific sequence number. */
+    void squash(const InstSeqNum &squashed_num);
+
+    /** Returns if there is a memory ordering violation. Value is reset upon
+     * call to getMemDepViolator().
+     */
+    bool violation() { return memDepViolator; }
+
+    /** Returns the memory ordering violator. */
+    DynInstPtr getMemDepViolator();
+
+    /** Returns if a load became blocked due to the memory system.  It clears
+     *  the bool's value upon this being called.
+     */
+    inline bool loadBlocked();
+
+    /** Returns the number of free entries (min of free LQ and SQ entries). */
+    unsigned numFreeEntries();
+
+    /** Returns the number of loads ready to execute. */
+    int numLoadsReady();
+
+    /** Returns the number of loads in the LQ. */
+    int numLoads() { return loads; }
+
+    /** Returns the number of stores in the SQ. */
+    int numStores() { return stores; }
+
+    /** Returns if either the LQ or SQ is full. */
+    bool isFull() { return lqFull() || sqFull(); }
+
+    /** Returns if the LQ is full. */
+    bool lqFull() { return loads >= (LQEntries - 1); }
+
+    /** Returns if the SQ is full. */
+    bool sqFull() { return stores >= (SQEntries - 1); }
+
+    /** Debugging function to dump instructions in the LSQ. */
+    void dumpInsts();
+
+    /** Returns the number of instructions in the LSQ. */
+    unsigned getCount() { return loads + stores; }
+
+    /** Returns if there are any stores to writeback. */
+    bool hasStoresToWB() { return storesToWB; }
+
+    /** Returns the number of stores to writeback. */
+    int numStoresToWB() { return storesToWB; }
+
+    /** Returns if the LSQ unit will writeback on this cycle. */
+    bool willWB() { return storeQueue[storeWBIdx].canWB &&
+                        !storeQueue[storeWBIdx].completed &&
+                        !dcacheInterface->isBlocked(); }
+
+  private:
+    /** Completes the store at the specified index. */
+    void completeStore(int store_idx);
+
+    /** Increments the given store index (circular queue). */
+    inline void incrStIdx(int &store_idx);
+    /** Decrements the given store index (circular queue). */
+    inline void decrStIdx(int &store_idx);
+    /** Increments the given load index (circular queue). */
+    inline void incrLdIdx(int &load_idx);
+    /** Decrements the given load index (circular queue). */
+    inline void decrLdIdx(int &load_idx);
+
+  private:
+    /** Pointer to the CPU. */
+    FullCPU *cpu;
+
+    /** Pointer to the back-end stage. */
+    BackEnd *be;
+
+    /** Pointer to the D-cache. */
+    MemInterface *dcacheInterface;
+
+    /** Pointer to the page table. */
+    PageTable *pTable;
+
+  public:
+    struct SQEntry {
+        /** Constructs an empty store queue entry. */
+        SQEntry()
+            : inst(NULL), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0)
+        { }
+
+        /** Constructs a store queue entry for a given instruction. */
+        SQEntry(DynInstPtr &_inst)
+            : inst(_inst), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0)
+        { }
+
+        /** The store instruction. */
+        DynInstPtr inst;
+        /** The memory request for the store. */
+        MemReqPtr req;
+        /** The size of the store. */
+        int size;
+        /** The store data. */
+        IntReg data;
+        /** Whether or not the store can writeback. */
+        bool canWB;
+        /** Whether or not the store is committed. */
+        bool committed;
+        /** Whether or not the store is completed. */
+        bool completed;
+    };
+
+    enum Status {
+        Running,
+        Idle,
+        DcacheMissStall,
+        DcacheMissSwitch
+    };
+
+  private:
+    /** The OzoneLSQ thread id. */
+    unsigned lsqID;
+
+    /** The status of the LSQ unit. */
+    Status _status;
+
+    /** The store queue. */
+    std::vector<SQEntry> storeQueue;
+
+    /** The load queue. */
+    std::vector<DynInstPtr> loadQueue;
+
+    // Consider making these 16 bits
+    /** The number of LQ entries. */
+    unsigned LQEntries;
+    /** The number of SQ entries. */
+    unsigned SQEntries;
+
+    /** The number of load instructions in the LQ. */
+    int loads;
+    /** The number of store instructions in the SQ (excludes those waiting to
+     * writeback).
+     */
+    int stores;
+    /** The number of store instructions in the SQ waiting to writeback. */
+    int storesToWB;
+
+    /** The index of the head instruction in the LQ. */
+    int loadHead;
+    /** The index of the tail instruction in the LQ. */
+    int loadTail;
+
+    /** The index of the head instruction in the SQ. */
+    int storeHead;
+    /** The index of the first instruction that is ready to be written back,
+     * and has not yet been written back.
+     */
+    int storeWBIdx;
+    /** The index of the tail instruction in the SQ. */
+    int storeTail;
+
+    /// @todo Consider moving to a more advanced model with write vs read ports
+    /** The number of cache ports available each cycle. */
+    int cachePorts;
+
+    /** The number of used cache ports in this cycle. */
+    int usedPorts;
+
+    //list<InstSeqNum> mshrSeqNums;
+
+     //Stats::Scalar<> dcacheStallCycles;
+    Counter lastDcacheStall;
+
+    /** Wire to read information from the issue stage time queue. */
+    typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+    // Make these per thread?
+    /** Whether or not the LSQ is stalled. */
+    bool stalled;
+    /** The store that causes the stall due to partial store to load
+     * forwarding.
+     */
+    InstSeqNum stallingStoreIsn;
+    /** The index of the above store. */
+    int stallingLoadIdx;
+
+    /** Whether or not a load is blocked due to the memory system.  It is
+     *  cleared when this value is checked via loadBlocked().
+     */
+    bool isLoadBlocked;
+
+    /** The oldest faulting load instruction. */
+    DynInstPtr loadFaultInst;
+    /** The oldest faulting store instruction. */
+    DynInstPtr storeFaultInst;
+
+    /** The oldest load that caused a memory ordering violation. */
+    DynInstPtr memDepViolator;
+
+    // Will also need how many read/write ports the Dcache has.  Or keep track
+    // of that in stage that is one level up, and only call executeLoad/Store
+    // the appropriate number of times.
+
+  public:
+    /** Executes the load at the given index. */
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx);
+
+    /** Executes the store at the given index. */
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx);
+
+    /** Returns the index of the head load instruction. */
+    int getLoadHead() { return loadHead; }
+    /** Returns the sequence number of the head load instruction. */
+    InstSeqNum getLoadHeadSeqNum()
+    {
+        if (loadQueue[loadHead]) {
+            return loadQueue[loadHead]->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns the index of the head store instruction. */
+    int getStoreHead() { return storeHead; }
+    /** Returns the sequence number of the head store instruction. */
+    InstSeqNum getStoreHeadSeqNum()
+    {
+        if (storeQueue[storeHead].inst) {
+            return storeQueue[storeHead].inst->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns whether or not the LSQ unit is stalled. */
+    bool isStalled()  { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+    //Depending on issue2execute delay a squashed load could
+    //execute if it is found to be squashed in the same
+    //cycle it is scheduled to execute
+    assert(loadQueue[load_idx]);
+
+    if (loadQueue[load_idx]->isExecuted()) {
+        panic("Should not reach this point with split ops!");
+
+        memcpy(&data,req->data,req->size);
+
+        return NoFault;
+    }
+
+    // Make sure this isn't an uncacheable access
+    // A bit of a hackish way to get uncached accesses to work only if they're
+    // at the head of the LSQ and are ready to commit (at the head of the ROB
+    // too).
+    // @todo: Fix uncached accesses.
+    if (req->flags & UNCACHEABLE &&
+        (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) {
+
+        return TheISA::genMachineCheckFault();
+    }
+
+    // Check the SQ for any previous stores that might lead to forwarding
+    int store_idx = loadQueue[load_idx]->sqIdx;
+
+    int store_size = 0;
+
+    DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, "
+            "storeHead: %i addr: %#x\n",
+            load_idx, store_idx, storeHead, req->paddr);
+
+    while (store_idx != -1) {
+        // End once we've reached the top of the LSQ
+        if (store_idx == storeWBIdx) {
+            break;
+        }
+
+        // Move the index to one younger
+        if (--store_idx < 0)
+            store_idx += SQEntries;
+
+        assert(storeQueue[store_idx].inst);
+
+        store_size = storeQueue[store_idx].size;
+
+        if (store_size == 0)
+            continue;
+
+        // Check if the store data is within the lower and upper bounds of
+        // addresses that the request needs.
+        bool store_has_lower_limit =
+            req->vaddr >= storeQueue[store_idx].inst->effAddr;
+        bool store_has_upper_limit =
+            (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr +
+                                         store_size);
+        bool lower_load_has_store_part =
+            req->vaddr < (storeQueue[store_idx].inst->effAddr +
+                           store_size);
+        bool upper_load_has_store_part =
+            (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr;
+
+        // If the store's data has all of the data needed, we can forward.
+        if (store_has_lower_limit && store_has_upper_limit) {
+
+            int shift_amt = req->vaddr & (store_size - 1);
+            // Assumes byte addressing
+            shift_amt = shift_amt << 3;
+
+            // Cast this to type T?
+            data = storeQueue[store_idx].data >> shift_amt;
+
+            req->cmd = Read;
+            assert(!req->completionEvent);
+            req->completionEvent = NULL;
+            req->time = curTick;
+            assert(!req->data);
+            req->data = new uint8_t[64];
+
+            memcpy(req->data, &data, req->size);
+
+            DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to "
+                    "addr %#x, data %#x\n",
+                    store_idx, req->vaddr, *(req->data));
+
+            typename BackEnd::LdWritebackEvent *wb =
+                new typename BackEnd::LdWritebackEvent(loadQueue[load_idx],
+                                                       be);
+
+            // We'll say this has a 1 cycle load-store forwarding latency
+            // for now.
+            // FIXME - Need to make this a parameter.
+            wb->schedule(curTick);
+
+            // Should keep track of stat for forwarded data
+            return NoFault;
+        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+                   (store_has_upper_limit && upper_load_has_store_part) ||
+                   (lower_load_has_store_part && upper_load_has_store_part)) {
+            // This is the partial store-load forwarding case where a store
+            // has only part of the load's data.
+
+            // If it's already been written back, then don't worry about
+            // stalling on it.
+            if (storeQueue[store_idx].completed) {
+                continue;
+            }
+
+            // Must stall load and force it to retry, so long as it's the oldest
+            // load that needs to do so.
+            if (!stalled ||
+                (stalled &&
+                 loadQueue[load_idx]->seqNum <
+                 loadQueue[stallingLoadIdx]->seqNum)) {
+                stalled = true;
+                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+                stallingLoadIdx = load_idx;
+            }
+
+            // Tell IQ/mem dep unit that this instruction will need to be
+            // rescheduled eventually
+            be->rescheduleMemInst(loadQueue[load_idx]);
+
+            DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
+                    "Store idx %i to load addr %#x\n",
+                    store_idx, req->vaddr);
+
+            return NoFault;
+        }
+    }
+
+
+    // If there's no forwarding case, then go access memory
+    DynInstPtr inst = loadQueue[load_idx];
+
+    ++usedPorts;
+
+    // if we have a cache, do cache access too
+    if (dcacheInterface) {
+        if (dcacheInterface->isBlocked()) {
+            isLoadBlocked = true;
+            // No fault occurred, even though the interface is blocked.
+            return NoFault;
+        }
+
+        DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
+                "vaddr:%#x flags:%i\n",
+                inst->readPC(), req->paddr, req->vaddr, req->flags);
+
+        // Setup MemReq pointer
+        req->cmd = Read;
+        req->completionEvent = NULL;
+        req->time = curTick;
+        assert(!req->data);
+        req->data = new uint8_t[64];
+
+        assert(!req->completionEvent);
+        req->completionEvent =
+            new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], be);
+
+        // Do Cache Access
+        MemAccessResult result = dcacheInterface->access(req);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        // @todo: Probably should support having no events
+        if (result != MA_HIT) {
+            DPRINTF(OzoneLSQ, "D-cache miss!\n");
+            DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+                    inst->seqNum);
+
+            lastDcacheStall = curTick;
+
+            _status = DcacheMissStall;
+
+        } else {
+//            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+//                    inst->seqNum);
+
+            DPRINTF(OzoneLSQ, "D-cache hit!\n");
+        }
+    } else {
+        fatal("Must use D-cache with new memory system");
+    }
+
+    return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+    assert(storeQueue[store_idx].inst);
+
+    DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
+            " | storeHead:%i [sn:%i]\n",
+            store_idx, req->paddr, data, storeHead,
+            storeQueue[store_idx].inst->seqNum);
+
+    storeQueue[store_idx].req = req;
+    storeQueue[store_idx].size = sizeof(T);
+    storeQueue[store_idx].data = data;
+
+    // This function only writes the data to the store queue, so no fault
+    // can happen here.
+    return NoFault;
+}
+
+template <class Impl>
+inline bool
+OzoneLSQ<Impl>::loadBlocked()
+{
+    bool ret_val = isLoadBlocked;
+    isLoadBlocked = false;
+    return ret_val;
+}
+
+#endif // __CPU_OZONE_LSQ_UNIT_HH__
diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh
new file mode 100644
index 000000000..6c7977250
--- /dev/null
+++ b/cpu/ozone/lsq_unit_impl.hh
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/isa_traits.hh"
+#include "base/str.hh"
+#include "cpu/ozone/lsq_unit.hh"
+
+template <class Impl>
+OzoneLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
+                                                          Event *wb_event,
+                                                          OzoneLSQ<Impl> *lsq_ptr)
+    : Event(&mainEventQueue),
+      storeIdx(store_idx),
+      wbEvent(wb_event),
+      lsqPtr(lsq_ptr)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::StoreCompletionEvent::process()
+{
+    DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx);
+
+    //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+//    lsqPtr->cpu->wakeCPU();
+    if (wbEvent)
+        wbEvent->process();
+    lsqPtr->completeStore(storeIdx);
+}
+
+template <class Impl>
+const char *
+OzoneLSQ<Impl>::StoreCompletionEvent::description()
+{
+    return "LSQ store completion event";
+}
+
+template <class Impl>
+OzoneLSQ<Impl>::OzoneLSQ()
+    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false)
+{
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
+                     unsigned maxSQEntries, unsigned id)
+
+{
+    DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id);
+
+    lsqID = id;
+
+    LQEntries = maxLQEntries;
+    SQEntries = maxSQEntries;
+
+    loadQueue.resize(LQEntries);
+    storeQueue.resize(SQEntries);
+
+
+    // May want to initialize these entries to NULL
+
+    loadHead = loadTail = 0;
+
+    storeHead = storeWBIdx = storeTail = 0;
+
+    usedPorts = 0;
+    cachePorts = params->cachePorts;
+
+    dcacheInterface = params->dcacheInterface;
+
+    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+}
+
+template<class Impl>
+std::string
+OzoneLSQ<Impl>::name() const
+{
+    return "lsqunit";
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::clearLQ()
+{
+    loadQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::clearSQ()
+{
+    storeQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+    DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
+    pTable = pt_ptr;
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::resizeLQ(unsigned size)
+{
+    assert( size >= LQEntries);
+
+    if (size > LQEntries) {
+        while (size > loadQueue.size()) {
+            DynInstPtr dummy;
+            loadQueue.push_back(dummy);
+            LQEntries++;
+        }
+    } else {
+        LQEntries = size;
+    }
+
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::resizeSQ(unsigned size)
+{
+    if (size > SQEntries) {
+        while (size > storeQueue.size()) {
+            SQEntry dummy;
+            storeQueue.push_back(dummy);
+            SQEntries++;
+        }
+    } else {
+        SQEntries = size;
+    }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insert(DynInstPtr &inst)
+{
+    // Make sure we really have a memory reference.
+    assert(inst->isMemRef());
+
+    // Make sure it's one of the two classes of memory references.
+    assert(inst->isLoad() || inst->isStore());
+
+    if (inst->isLoad()) {
+        insertLoad(inst);
+    } else {
+        insertStore(inst);
+    }
+
+//    inst->setInLSQ();
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+    assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
+
+    DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+            load_inst->readPC(), loadTail, load_inst->seqNum);
+
+    load_inst->lqIdx = loadTail;
+
+    if (stores == 0) {
+        load_inst->sqIdx = -1;
+    } else {
+        load_inst->sqIdx = storeTail;
+    }
+
+    loadQueue[loadTail] = load_inst;
+
+    incrLdIdx(loadTail);
+
+    ++loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+    // Make sure it is not full before inserting an instruction.
+    assert((storeTail + 1) % SQEntries != storeHead);
+    assert(stores < SQEntries);
+
+    DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+            store_inst->readPC(), storeTail, store_inst->seqNum);
+
+    store_inst->sqIdx = storeTail;
+    store_inst->lqIdx = loadTail;
+
+    storeQueue[storeTail] = SQEntry(store_inst);
+
+    incrStIdx(storeTail);
+
+    ++stores;
+
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+OzoneLSQ<Impl>::getMemDepViolator()
+{
+    DynInstPtr temp = memDepViolator;
+
+    memDepViolator = NULL;
+
+    return temp;
+}
+
+template <class Impl>
+unsigned
+OzoneLSQ<Impl>::numFreeEntries()
+{
+    unsigned free_lq_entries = LQEntries - loads;
+    unsigned free_sq_entries = SQEntries - stores;
+
+    // Both the LQ and SQ entries have an extra dummy entry to differentiate
+    // empty/full conditions.  Subtract 1 from the free entries.
+    if (free_lq_entries < free_sq_entries) {
+        return free_lq_entries - 1;
+    } else {
+        return free_sq_entries - 1;
+    }
+}
+
+template <class Impl>
+int
+OzoneLSQ<Impl>::numLoadsReady()
+{
+    int load_idx = loadHead;
+    int retval = 0;
+
+    while (load_idx != loadTail) {
+        assert(loadQueue[load_idx]);
+
+        if (loadQueue[load_idx]->readyToIssue()) {
+            ++retval;
+        }
+    }
+
+    return retval;
+}
+
+#if 0
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad()
+{
+    Fault load_fault = NoFault;
+    DynInstPtr load_inst;
+
+    assert(readyLoads.size() != 0);
+
+    // Execute a ready load.
+    LdMapIt ready_it = readyLoads.begin();
+
+    load_inst = (*ready_it).second;
+
+    // Execute the instruction, which is held in the data portion of the
+    // iterator.
+    load_fault = load_inst->execute();
+
+    // If it executed successfully, then switch it over to the executed
+    // loads list.
+    if (load_fault == NoFault) {
+        executedLoads[load_inst->seqNum] = load_inst;
+
+        readyLoads.erase(ready_it);
+    } else {
+        loadFaultInst = load_inst;
+    }
+
+    return load_fault;
+}
+#endif
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+    // Execute a specific load.
+    Fault load_fault = NoFault;
+
+    DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
+            inst->readPC(),inst->seqNum);
+
+    // Make sure it's really in the list.
+    // Normally it should always be in the list.  However,
+    /* due to a syscall it may not be the list.
+#ifdef DEBUG
+    int i = loadHead;
+    while (1) {
+        if (i == loadTail && !find(inst)) {
+            assert(0 && "Load not in the queue!");
+        } else if (loadQueue[i] == inst) {
+            break;
+        }
+
+        i = i + 1;
+        if (i >= LQEntries) {
+            i = 0;
+        }
+    }
+#endif // DEBUG*/
+
+    load_fault = inst->initiateAcc();
+
+    // Might want to make sure that I'm not overwriting a previously faulting
+    // instruction that hasn't been checked yet.
+    // Actually probably want the oldest faulting load
+    if (load_fault != NoFault) {
+        // Maybe just set it as can commit here, although that might cause
+        // some other problems with sending traps to the ROB too quickly.
+//        iewStage->instToCommit(inst);
+//        iewStage->activityThisCycle();
+    }
+
+    return load_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad(int lq_idx)
+{
+    // Very hackish.  Not sure the best way to check that this
+    // instruction is at the head of the ROB.  I should have some sort
+    // of extra information here so that I'm not overloading the
+    // canCommit signal for 15 different things.
+    loadQueue[lq_idx]->setCanCommit();
+    Fault ret_fault = executeLoad(loadQueue[lq_idx]);
+    loadQueue[lq_idx]->clearCanCommit();
+    return ret_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeStore(DynInstPtr &store_inst)
+{
+    // Make sure that a store exists.
+    assert(stores != 0);
+
+    int store_idx = store_inst->sqIdx;
+
+    DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
+            store_inst->readPC(), store_inst->seqNum);
+
+    // Check the recently completed loads to see if any match this store's
+    // address.  If so, then we have a memory ordering violation.
+    int load_idx = store_inst->lqIdx;
+
+    Fault store_fault = store_inst->initiateAcc();
+
+    // Store size should now be available.  Use it to get proper offset for
+    // addr comparisons.
+    int size = storeQueue[store_idx].size;
+
+    if (size == 0) {
+        DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+                store_inst->readPC(),store_inst->seqNum);
+
+        return store_fault;
+    }
+
+    assert(store_fault == NoFault);
+
+    if (!storeFaultInst) {
+        if (store_fault != NoFault) {
+            panic("Fault in a store instruction!");
+            storeFaultInst = store_inst;
+        } else if (store_inst->isNonSpeculative()) {
+            // Nonspeculative accesses (namely store conditionals)
+            // need to set themselves as able to writeback if we
+            // haven't had a fault by here.
+            storeQueue[store_idx].canWB = true;
+
+            ++storesToWB;
+        }
+    }
+
+    if (!memDepViolator) {
+        while (load_idx != loadTail) {
+            // Actually should only check loads that have actually executed
+            // Might be safe because effAddr is set to InvalAddr when the
+            // dyn inst is created.
+
+            // Must actually check all addrs in the proper size range
+            // Which is more correct than needs to be.  What if for now we just
+            // assume all loads are quad-word loads, and do the addr based
+            // on that.
+            // @todo: Fix this, magic number being used here
+            if ((loadQueue[load_idx]->effAddr >> 8) ==
+                (store_inst->effAddr >> 8)) {
+                // A load incorrectly passed this store.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                memDepViolator = loadQueue[load_idx];
+
+                return TheISA::genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
+
+        // If we've reached this point, there was no violation.
+        memDepViolator = NULL;
+    }
+
+    return store_fault;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoad()
+{
+    assert(loadQueue[loadHead]);
+
+    DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
+            loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC());
+
+
+    loadQueue[loadHead] = NULL;
+
+    incrLdIdx(loadHead);
+
+    --loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoad(InstSeqNum &inst)
+{
+    // Hopefully I don't use this function too much
+    panic("Don't use this function!");
+
+    int i = loadHead;
+    while (1) {
+        if (i == loadTail) {
+            assert(0 && "Load not in the queue!");
+        } else if (loadQueue[i]->seqNum == inst) {
+            break;
+        }
+
+        ++i;
+        if (i >= LQEntries) {
+            i = 0;
+        }
+    }
+
+//    loadQueue[i]->removeInLSQ();
+    loadQueue[i] = NULL;
+    --loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+    assert(loads == 0 || loadQueue[loadHead]);
+
+    while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+        commitLoad();
+    }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+    assert(stores == 0 || storeQueue[storeHead].inst);
+
+    int store_idx = storeHead;
+
+    while (store_idx != storeTail) {
+        assert(storeQueue[store_idx].inst);
+        if (!storeQueue[store_idx].canWB) {
+            if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+                break;
+            }
+            DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
+                    "%#x [sn:%lli]\n",
+                    storeQueue[store_idx].inst->readPC(),
+                    storeQueue[store_idx].inst->seqNum);
+
+            storeQueue[store_idx].canWB = true;
+
+//            --stores;
+            ++storesToWB;
+        }
+
+        incrStIdx(store_idx);
+    }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::writebackStores()
+{
+    while (storesToWB > 0 &&
+           storeWBIdx != storeTail &&
+           storeQueue[storeWBIdx].inst &&
+           storeQueue[storeWBIdx].canWB &&
+           usedPorts < cachePorts) {
+
+        if (storeQueue[storeWBIdx].size == 0) {
+            completeStore(storeWBIdx);
+
+            incrStIdx(storeWBIdx);
+
+            continue;
+        }
+
+        if (dcacheInterface && dcacheInterface->isBlocked()) {
+            DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+                    " is blocked!\n");
+            break;
+        }
+
+        ++usedPorts;
+
+        if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+            incrStIdx(storeWBIdx);
+
+            continue;
+        }
+
+        assert(storeQueue[storeWBIdx].req);
+        assert(!storeQueue[storeWBIdx].committed);
+
+        MemReqPtr req = storeQueue[storeWBIdx].req;
+        storeQueue[storeWBIdx].committed = true;
+
+//	Fault fault = cpu->translateDataReadReq(req);
+        req->cmd = Write;
+        req->completionEvent = NULL;
+        req->time = curTick;
+        assert(!req->data);
+        req->data = new uint8_t[64];
+        memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
+
+        DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+                "to Addr:%#x, data:%#x [sn:%lli]\n",
+                storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
+                req->paddr, *(req->data),
+                storeQueue[storeWBIdx].inst->seqNum);
+
+//        if (fault != NoFault) {
+            //What should we do if there is a fault???
+            //for now panic
+//            panic("Page Table Fault!!!!!\n");
+//        }
+
+        if (dcacheInterface) {
+            MemAccessResult result = dcacheInterface->access(req);
+
+            //@todo temp fix for LL/SC (works fine for 1 CPU)
+            if (req->flags & LOCKED) {
+                req->result=1;
+                panic("LL/SC! oh no no support!!!");
+            }
+
+            if (isStalled() &&
+                storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+                DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+                        "load idx:%i\n",
+                        stallingStoreIsn, stallingLoadIdx);
+                stalled = false;
+                stallingStoreIsn = 0;
+                be->replayMemInst(loadQueue[stallingLoadIdx]);
+            }
+
+            if (result != MA_HIT && dcacheInterface->doEvents()) {
+                Event *wb = NULL;
+/*
+                typename IEW::LdWritebackEvent *wb = NULL;
+                if (req->flags & LOCKED) {
+                    // Stx_C does not generate a system port transaction.
+                    req->result=0;
+                    wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+                                                            iewStage);
+                }
+*/
+                DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+
+//                DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+//                        storeQueue[storeWBIdx].inst->seqNum);
+
+                // Will stores need their own kind of writeback events?
+                // Do stores even need writeback events?
+                assert(!req->completionEvent);
+                req->completionEvent = new
+                    StoreCompletionEvent(storeWBIdx, wb, this);
+
+                lastDcacheStall = curTick;
+
+                _status = DcacheMissStall;
+
+                //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+                //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+                // Increment stat here or something
+            } else {
+                DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
+                        storeWBIdx);
+
+//                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+//                        storeQueue[storeWBIdx].inst->seqNum);
+
+                if (req->flags & LOCKED) {
+                    // Stx_C does not generate a system port transaction.
+                    req->result=1;
+                    typename BackEnd::LdWritebackEvent *wb =
+                        new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+                                                               be);
+                    wb->schedule(curTick);
+                }
+
+                completeStore(storeWBIdx);
+            }
+
+            incrStIdx(storeWBIdx);
+        } else {
+            panic("Must HAVE DCACHE!!!!!\n");
+        }
+    }
+
+    // Not sure this should set it to 0.
+    usedPorts = 0;
+
+    assert(stores >= 0 && storesToWB >= 0);
+}
+
+/*template <class Impl>
+void
+OzoneLSQ<Impl>::removeMSHR(InstSeqNum seqNum)
+{
+    list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+                                              mshrSeqNums.end(),
+                                              seqNum);
+
+    if (mshr_it != mshrSeqNums.end()) {
+        mshrSeqNums.erase(mshr_it);
+        DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+    }
+}*/
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
+{
+    DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
+            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+
+    int load_idx = loadTail;
+    decrLdIdx(load_idx);
+
+    while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+
+        // Clear the smart pointer to make sure it is decremented.
+        DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
+                "[sn:%lli]\n",
+                loadQueue[load_idx]->readPC(),
+                loadQueue[load_idx]->seqNum);
+
+        if (isStalled() && load_idx == stallingLoadIdx) {
+            stalled = false;
+            stallingStoreIsn = 0;
+            stallingLoadIdx = 0;
+        }
+
+        loadQueue[load_idx]->squashed = true;
+        loadQueue[load_idx] = NULL;
+        --loads;
+
+        // Inefficient!
+        loadTail = load_idx;
+
+        decrLdIdx(load_idx);
+    }
+
+    int store_idx = storeTail;
+    decrStIdx(store_idx);
+
+    while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) {
+
+        // Clear the smart pointer to make sure it is decremented.
+        DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, "
+                "idx:%i [sn:%lli]\n",
+                storeQueue[store_idx].inst->readPC(),
+                store_idx, storeQueue[store_idx].inst->seqNum);
+
+        // I don't think this can happen.  It should have been cleared by the
+        // stalling load.
+        if (isStalled() &&
+            storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+            panic("Is stalled should have been cleared by stalling load!\n");
+            stalled = false;
+            stallingStoreIsn = 0;
+        }
+
+        storeQueue[store_idx].inst->squashed = true;
+        storeQueue[store_idx].inst = NULL;
+        storeQueue[store_idx].canWB = 0;
+
+        if (storeQueue[store_idx].req) {
+            assert(!storeQueue[store_idx].req->completionEvent);
+        }
+        storeQueue[store_idx].req = NULL;
+        --stores;
+
+        // Inefficient!
+        storeTail = store_idx;
+
+        decrStIdx(store_idx);
+    }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::dumpInsts()
+{
+    cprintf("Load store queue: Dumping instructions.\n");
+    cprintf("Load queue size: %i\n", loads);
+    cprintf("Load queue: ");
+
+    int load_idx = loadHead;
+
+    while (load_idx != loadTail && loadQueue[load_idx]) {
+        cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum,
+                loadQueue[load_idx]->readPC());
+
+        incrLdIdx(load_idx);
+    }
+
+    cprintf("\nStore queue size: %i\n", stores);
+    cprintf("Store queue: ");
+
+    int store_idx = storeHead;
+
+    while (store_idx != storeTail && storeQueue[store_idx].inst) {
+        cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum,
+                storeQueue[store_idx].inst->readPC());
+
+        incrStIdx(store_idx);
+    }
+
+    cprintf("\n");
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::completeStore(int store_idx)
+{
+    assert(storeQueue[store_idx].inst);
+    storeQueue[store_idx].completed = true;
+    --storesToWB;
+    // A bit conservative because a store completion may not free up entries,
+    // but hopefully avoids two store completions in one cycle from making
+    // the CPU tick twice.
+//    cpu->activityThisCycle();
+
+    if (store_idx == storeHead) {
+        do {
+            incrStIdx(storeHead);
+
+            --stores;
+        } while (storeQueue[storeHead].completed &&
+                 storeHead != storeTail);
+
+//        be->updateLSQNextCycle = true;
+    }
+
+    DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead);
+
+    if (isStalled() &&
+        storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+        DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+                "load idx:%i\n",
+                stallingStoreIsn, stallingLoadIdx);
+        stalled = false;
+        stallingStoreIsn = 0;
+        be->replayMemInst(loadQueue[stallingLoadIdx]);
+    }
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::incrStIdx(int &store_idx)
+{
+    if (++store_idx >= SQEntries)
+        store_idx = 0;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::decrStIdx(int &store_idx)
+{
+    if (--store_idx < 0)
+        store_idx += SQEntries;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::incrLdIdx(int &load_idx)
+{
+    if (++load_idx >= LQEntries)
+        load_idx = 0;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::decrLdIdx(int &load_idx)
+{
+    if (--load_idx < 0)
+        load_idx += LQEntries;
+}
diff --git a/cpu/ozone/null_predictor.hh b/cpu/ozone/null_predictor.hh
new file mode 100644
index 000000000..d19e2cd1c
--- /dev/null
+++ b/cpu/ozone/null_predictor.hh
@@ -0,0 +1,76 @@
+
+#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__
+#define __CPU_OZONE_NULL_PREDICTOR_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/inst_seq.hh"
+
+template <class Impl>
+class NullPredictor
+{
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+
+    NullPredictor(Params *p) { }
+
+    struct BPredInfo {
+        BPredInfo()
+            : PC(0), nextPC(0)
+        { }
+
+        BPredInfo(const Addr &pc, const Addr &next_pc)
+            : PC(pc), nextPC(next_pc)
+        { }
+
+        Addr PC;
+        Addr nextPC;
+    };
+
+    BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); }
+
+    void undo(BPredInfo &bp_info) { return; }
+
+    /**
+     * Predicts whether or not the instruction is a taken branch, and the
+     * target of the branch if it is taken.
+     * @param inst The branch instruction.
+     * @param PC The predicted PC is passed back through this parameter.
+     * @param tid The thread id.
+     * @return Returns if the branch is taken or not.
+     */
+    bool predict(DynInstPtr &inst, Addr &PC, unsigned tid)
+    { return false; }
+
+    /**
+     * Tells the branch predictor to commit any updates until the given
+     * sequence number.
+     * @param done_sn The sequence number to commit any older updates up until.
+     * @param tid The thread id.
+     */
+    void update(const InstSeqNum &done_sn, unsigned tid) { }
+
+    /**
+     * Squashes all outstanding updates until a given sequence number.
+     * @param squashed_sn The sequence number to squash any younger updates up
+     * until.
+     * @param tid The thread id.
+     */
+    void squash(const InstSeqNum &squashed_sn, unsigned tid) { }
+
+    /**
+     * Squashes all outstanding updates until a given sequence number, and
+     * corrects that sn's update with the proper address and taken/not taken.
+     * @param squashed_sn The sequence number to squash any younger updates up
+     * until.
+     * @param corr_target The correct branch target.
+     * @param actually_taken The correct branch direction.
+     * @param tid The thread id.
+     */
+    void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
+                bool actually_taken, unsigned tid)
+    { }
+
+};
+
+#endif // __CPU_OZONE_NULL_PREDICTOR_HH__
diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh
new file mode 100644
index 000000000..a2c706c60
--- /dev/null
+++ b/cpu/ozone/ozone_impl.hh
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_OZONE_IMPL_HH__
+#define __CPU_OZONE_OZONE_IMPL_HH__
+
+#include "arch/alpha/isa_traits.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/back_end.hh"
+#include "cpu/ozone/front_end.hh"
+#include "cpu/ozone/inst_queue.hh"
+#include "cpu/ozone/lsq_unit.hh"
+#include "cpu/ozone/null_predictor.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/simple_params.hh"
+
+template <class Impl>
+class OzoneCPU;
+
+template <class Impl>
+class OzoneDynInst;
+
+struct OzoneImpl {
+    typedef SimpleParams Params;
+    typedef OzoneCPU<OzoneImpl> OzoneCPU;
+    typedef OzoneCPU FullCPU;
+
+    // Would like to put these into their own area.
+//    typedef NullPredictor BranchPred;
+    typedef TwobitBPredUnit<OzoneImpl> BranchPred;
+    typedef FrontEnd<OzoneImpl> FrontEnd;
+    // Will need IQ, LSQ eventually
+    typedef BackEnd<OzoneImpl> BackEnd;
+
+    typedef InstQueue<OzoneImpl> InstQueue;
+    typedef OzoneLSQ<OzoneImpl> LdstQueue;
+
+    typedef OzoneDynInst<OzoneImpl> DynInst;
+    typedef RefCountingPtr<DynInst> DynInstPtr;
+
+    typedef uint64_t IssueStruct;
+
+    enum {
+        MaxThreads = 1
+    };
+};
+
+#endif // __CPU_OZONE_OZONE_IMPL_HH__
diff --git a/cpu/ozone/rename_table.cc b/cpu/ozone/rename_table.cc
new file mode 100644
index 000000000..fff41903e
--- /dev/null
+++ b/cpu/ozone/rename_table.cc
@@ -0,0 +1,7 @@
+
+#include "cpu/ozone/rename_table_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class RenameTable<OzoneImpl>;
+template class RenameTable<SimpleImpl>;
diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh
new file mode 100644
index 000000000..afbf6ff32
--- /dev/null
+++ b/cpu/ozone/rename_table.hh
@@ -0,0 +1,25 @@
+#ifndef __CPU_OZONE_RENAME_TABLE_HH__
+#define __CPU_OZONE_RENAME_TABLE_HH__
+
+#include "arch/isa_traits.hh"
+
+/** Rename table that holds the rename of each architectural register to
+ *  producing DynInst. Needs to support copying from one table to another.
+ */
+
+template <class Impl>
+class RenameTable {
+  public:
+    typedef typename Impl::DynInstPtr DynInstPtr;
+
+    RenameTable();
+
+    void copyFrom(const RenameTable<Impl> &table_to_copy);
+
+    DynInstPtr &operator [] (int index)
+    { return table[index]; }
+
+    DynInstPtr table[TheISA::TotalNumRegs];
+};
+
+#endif // __CPU_OZONE_RENAME_TABLE_HH__
diff --git a/cpu/ozone/rename_table_impl.hh b/cpu/ozone/rename_table_impl.hh
new file mode 100644
index 000000000..86fc1cc55
--- /dev/null
+++ b/cpu/ozone/rename_table_impl.hh
@@ -0,0 +1,23 @@
+
+#include <cstdlib>  // Not really sure what to include to get NULL
+#include "cpu/ozone/rename_table.hh"
+
+template <class Impl>
+RenameTable<Impl>::RenameTable()
+{
+    // Actually should set these to dummy dyn insts that have the initial value
+    // and force their values to be initialized.  This keeps everything the
+    // same.
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        table[i] = NULL;
+    }
+}
+
+template <class Impl>
+void
+RenameTable<Impl>::copyFrom(const RenameTable<Impl> &table_to_copy)
+{
+    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+        table[i] = table_to_copy.table[i];
+    }
+}
diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh
new file mode 100644
index 000000000..961bf2ea9
--- /dev/null
+++ b/cpu/ozone/simple_impl.hh
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__
+#define __CPU_OZONE_SIMPLE_IMPL_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/cpu.hh"
+#include "cpu/ozone/front_end.hh"
+#include "cpu/ozone/inorder_back_end.hh"
+#include "cpu/ozone/null_predictor.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/simple_params.hh"
+
+//template <class Impl>
+//class OzoneCPU;
+
+template <class Impl>
+class OzoneDynInst;
+
+struct SimpleImpl {
+    typedef SimpleParams Params;
+    typedef OzoneCPU<SimpleImpl> OzoneCPU;
+    typedef OzoneCPU FullCPU;
+
+    // Would like to put these into their own area.
+//    typedef NullPredictor BranchPred;
+    typedef TwobitBPredUnit<SimpleImpl> BranchPred;
+    typedef FrontEnd<SimpleImpl> FrontEnd;
+    // Will need IQ, LSQ eventually
+    typedef InorderBackEnd<SimpleImpl> BackEnd;
+
+    typedef OzoneDynInst<SimpleImpl> DynInst;
+    typedef RefCountingPtr<DynInst> DynInstPtr;
+
+    typedef uint64_t IssueStruct;
+
+    enum {
+        MaxThreads = 1
+    };
+};
+
+#endif // __CPU_OZONE_SIMPLE_IMPL_HH__
diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh
new file mode 100644
index 000000000..e503654aa
--- /dev/null
+++ b/cpu/ozone/simple_params.hh
@@ -0,0 +1,164 @@
+
+
+#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
+#define __CPU_OZONE_SIMPLE_PARAMS_HH__
+
+#include "cpu/ozone/cpu.hh"
+
+//Forward declarations
+class AlphaDTB;
+class AlphaITB;
+class FUPool;
+class FunctionalMemory;
+class MemInterface;
+class PageTable;
+class Process;
+class System;
+
+/**
+ * This file defines the parameters that will be used for the OzoneCPU.
+ * This must be defined externally so that the Impl can have a params class
+ * defined that it can pass to all of the individual stages.
+ */
+
+class SimpleParams : public BaseCPU::Params
+{
+  public:
+
+#if FULL_SYSTEM
+    AlphaITB *itb; AlphaDTB *dtb;
+#else
+    std::vector<Process *> workload;
+//    Process *process;
+#endif // FULL_SYSTEM
+
+    //Page Table
+    PageTable *pTable;
+
+    FunctionalMemory *mem;
+
+    //
+    // Caches
+    //
+    MemInterface *icacheInterface;
+    MemInterface *dcacheInterface;
+
+    unsigned cachePorts;
+    unsigned width;
+    unsigned frontEndWidth;
+    unsigned backEndWidth;
+    unsigned backEndSquashLatency;
+    unsigned backEndLatency;
+    unsigned maxInstBufferSize;
+    unsigned numPhysicalRegs;
+    //
+    // Fetch
+    //
+    unsigned decodeToFetchDelay;
+    unsigned renameToFetchDelay;
+    unsigned iewToFetchDelay;
+    unsigned commitToFetchDelay;
+    unsigned fetchWidth;
+
+    //
+    // Decode
+    //
+    unsigned renameToDecodeDelay;
+    unsigned iewToDecodeDelay;
+    unsigned commitToDecodeDelay;
+    unsigned fetchToDecodeDelay;
+    unsigned decodeWidth;
+
+    //
+    // Rename
+    //
+    unsigned iewToRenameDelay;
+    unsigned commitToRenameDelay;
+    unsigned decodeToRenameDelay;
+    unsigned renameWidth;
+
+    //
+    // IEW
+    //
+    unsigned commitToIEWDelay;
+    unsigned renameToIEWDelay;
+    unsigned issueToExecuteDelay;
+    unsigned issueWidth;
+    unsigned executeWidth;
+    unsigned executeIntWidth;
+    unsigned executeFloatWidth;
+    unsigned executeBranchWidth;
+    unsigned executeMemoryWidth;
+    FUPool *fuPool;
+
+    //
+    // Commit
+    //
+    unsigned iewToCommitDelay;
+    unsigned renameToROBDelay;
+    unsigned commitWidth;
+    unsigned squashWidth;
+
+    //
+    // Branch predictor (BP & BTB)
+    //
+    unsigned localPredictorSize;
+    unsigned localCtrBits;
+    unsigned localHistoryTableSize;
+    unsigned localHistoryBits;
+    unsigned globalPredictorSize;
+    unsigned globalCtrBits;
+    unsigned globalHistoryBits;
+    unsigned choicePredictorSize;
+    unsigned choiceCtrBits;
+
+    unsigned BTBEntries;
+    unsigned BTBTagSize;
+
+    unsigned RASSize;
+
+    //
+    // Load store queue
+    //
+    unsigned LQEntries;
+    unsigned SQEntries;
+
+    //
+    // Memory dependence
+    //
+    unsigned SSITSize;
+    unsigned LFSTSize;
+
+    //
+    // Miscellaneous
+    //
+    unsigned numPhysIntRegs;
+    unsigned numPhysFloatRegs;
+    unsigned numIQEntries;
+    unsigned numROBEntries;
+
+    bool decoupledFrontEnd;
+    int dispatchWidth;
+    int wbWidth;
+
+    //SMT Parameters
+    unsigned smtNumFetchingThreads;
+
+    std::string   smtFetchPolicy;
+
+    std::string   smtIQPolicy;
+    unsigned smtIQThreshold;
+
+    std::string   smtLSQPolicy;
+    unsigned smtLSQThreshold;
+
+    std::string   smtCommitPolicy;
+
+    std::string   smtROBPolicy;
+    unsigned smtROBThreshold;
+
+    // Probably can get this from somewhere.
+    unsigned instShiftAmt;
+};
+
+#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__
diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh
new file mode 100644
index 000000000..c6d23a63b
--- /dev/null
+++ b/cpu/ozone/thread_state.hh
@@ -0,0 +1,171 @@
+
+#ifndef __CPU_OZONE_THREAD_STATE_HH__
+#define __CPU_OZONE_THREAD_STATE_HH__
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/thread_state.hh"
+
+class Event;
+class Process;
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+#else
+class Process;
+class FunctionalMemory;
+#endif
+
+// Maybe this ozone thread state should only really have committed state?
+// I need to think about why I'm using this and what it's useful for.  Clearly
+// has benefits for SMT; basically serves same use as CPUExecContext.
+// Makes the ExecContext proxy easier.  Gives organization/central access point
+// to state of a thread that can be accessed normally (i.e. not in-flight
+// stuff within a OoO processor).  Does this need an XC proxy within it?
+template <class Impl>
+struct OzoneThreadState : public ThreadState {
+    typedef typename ExecContext::Status Status;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef TheISA::MiscReg MiscReg;
+
+#if FULL_SYSTEM
+    OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
+        : ThreadState(-1, _thread_num, _mem),
+          inSyscall(0), trapPending(0)
+    {
+        memset(&regs, 0, sizeof(TheISA::RegFile));
+    }
+#else
+    OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
+        : ThreadState(-1, _thread_num, NULL, _process, _asid),
+          cpu(_cpu), inSyscall(0), trapPending(0)
+    {
+        memset(&regs, 0, sizeof(TheISA::RegFile));
+    }
+
+    OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
+                     int _asid)
+        : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+          cpu(_cpu), inSyscall(0), trapPending(0)
+    {
+        memset(&regs, 0, sizeof(TheISA::RegFile));
+    }
+#endif
+
+    Status _status;
+
+    Status status() const { return _status; }
+
+    void setStatus(Status new_status) { _status = new_status; }
+
+    RenameTable<Impl> renameTable; // Should I include backend and frontend
+    // tables here?  For the ozone CPU, maybe, for the new full CPU, probably
+    // not...you wouldn't want threads just accessing the backend/frontend
+    // rename tables.
+    Addr PC; // What should these be set to?  Probably the committed ones.
+    Addr nextPC;
+
+    // Current instruction?
+    TheISA::MachInst inst;
+
+    TheISA::RegFile regs;
+    // Front end?  Back end?
+//    MemReqPtr memReq;
+
+    typename Impl::FullCPU *cpu;
+
+    bool inSyscall;
+
+    bool trapPending;
+
+    ExecContext *xcProxy;
+
+    ExecContext *getXCProxy() { return xcProxy; }
+
+#if !FULL_SYSTEM
+
+    Fault dummyTranslation(MemReqPtr &req)
+    {
+#if 0
+        assert((req->vaddr >> 48 & 0xffff) == 0);
+#endif
+
+        // put the asid in the upper 16 bits of the paddr
+        req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
+        req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
+        return NoFault;
+    }
+    Fault translateInstReq(MemReqPtr &req)
+    {
+        return dummyTranslation(req);
+    }
+    Fault translateDataReadReq(MemReqPtr &req)
+    {
+        return dummyTranslation(req);
+    }
+    Fault translateDataWriteReq(MemReqPtr &req)
+    {
+        return dummyTranslation(req);
+    }
+#else
+    Fault translateInstReq(MemReqPtr &req)
+    {
+        return cpu->itb->translate(req);
+    }
+
+    Fault translateDataReadReq(MemReqPtr &req)
+    {
+        return cpu->dtb->translate(req, false);
+    }
+
+    Fault translateDataWriteReq(MemReqPtr &req)
+    {
+        return cpu->dtb->translate(req, true);
+    }
+#endif
+
+    MiscReg readMiscReg(int misc_reg)
+    {
+        return regs.miscRegs.readReg(misc_reg);
+    }
+
+    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+    {
+        return regs.miscRegs.readRegWithEffect(misc_reg, fault, xcProxy);
+    }
+
+    Fault setMiscReg(int misc_reg, const MiscReg &val)
+    {
+        return regs.miscRegs.setReg(misc_reg, val);
+    }
+
+    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+    {
+        return regs.miscRegs.setRegWithEffect(misc_reg, val, xcProxy);
+    }
+
+    uint64_t readPC()
+    { return PC; }
+
+    void setPC(uint64_t val)
+    { PC = val; }
+
+    uint64_t readNextPC()
+    { return nextPC; }
+
+    void setNextPC(uint64_t val)
+    { nextPC = val; }
+
+    bool misspeculating() { return false; }
+
+    void setInst(TheISA::MachInst _inst) { inst = _inst; }
+
+    Counter readFuncExeInst() { return funcExeInst; }
+
+    void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+};
+
+#endif // __CPU_OZONE_THREAD_STATE_HH__
-- 
cgit v1.2.3


From 6b4396111ba26fd16c7cf0047c4cb3e13036c298 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Sat, 22 Apr 2006 19:10:39 -0400
Subject: Updates for OzoneCPU.

cpu/static_inst.hh:
    Updates for new CPU, also include a classification of quiesce instructions.

--HG--
extra : convert_revision : a34cd56da88fe57d7de24674fbb375bbf13f887f
---
 cpu/ozone/back_end.cc         |    2 +-
 cpu/ozone/back_end.hh         |    7 +
 cpu/ozone/back_end_impl.hh    |   85 ++-
 cpu/ozone/cpu.hh              |   20 +-
 cpu/ozone/cpu_builder.cc      |   12 +-
 cpu/ozone/cpu_impl.hh         |   54 +-
 cpu/ozone/front_end.hh        |    4 +
 cpu/ozone/front_end_impl.hh   |   13 +-
 cpu/ozone/inorder_back_end.hh |    1 +
 cpu/ozone/lsq_unit.hh         |    9 +-
 cpu/ozone/lsq_unit_impl.hh    |    4 +-
 cpu/ozone/lw_back_end.cc      |    5 +
 cpu/ozone/lw_back_end.hh      |  503 ++++++++++++++
 cpu/ozone/lw_back_end_impl.hh | 1486 +++++++++++++++++++++++++++++++++++++++++
 cpu/ozone/lw_lsq.cc           |   34 +
 cpu/ozone/lw_lsq.hh           |  649 ++++++++++++++++++
 cpu/ozone/lw_lsq_impl.hh      |  766 +++++++++++++++++++++
 cpu/ozone/ozone_impl.hh       |    6 +-
 18 files changed, 3612 insertions(+), 48 deletions(-)
 create mode 100644 cpu/ozone/lw_back_end.cc
 create mode 100644 cpu/ozone/lw_back_end.hh
 create mode 100644 cpu/ozone/lw_back_end_impl.hh
 create mode 100644 cpu/ozone/lw_lsq.cc
 create mode 100644 cpu/ozone/lw_lsq.hh
 create mode 100644 cpu/ozone/lw_lsq_impl.hh

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc
index dbab5435e..cb014e4cc 100644
--- a/cpu/ozone/back_end.cc
+++ b/cpu/ozone/back_end.cc
@@ -2,4 +2,4 @@
 #include "cpu/ozone/back_end_impl.hh"
 #include "cpu/ozone/ozone_impl.hh"
 
-template class BackEnd<OzoneImpl>;
+//template class BackEnd<OzoneImpl>;
diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh
index 0713a0143..14b011ab8 100644
--- a/cpu/ozone/back_end.hh
+++ b/cpu/ozone/back_end.hh
@@ -125,6 +125,7 @@ class BackEnd
         InstList nonSpec;
         InstList replayList;
         ReadyInstQueue readyQueue;
+      public:
         int size;
         int numInsts;
         int width;
@@ -321,6 +322,12 @@ class BackEnd
     int numROBEntries;
     int numInsts;
 
+    bool squashPending;
+    InstSeqNum squashSeqNum;
+    Addr squashNextPC;
+
+    Fault faultFromFetch;
+
   private:
     typedef typename std::list<DynInstPtr>::iterator InstListIt;
 
diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh
index 807afaf2e..0b0f04f59 100644
--- a/cpu/ozone/back_end_impl.hh
+++ b/cpu/ozone/back_end_impl.hh
@@ -100,6 +100,7 @@ BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
     numInsts++;
     inst_count[0]++;
     if (!inst->isNonSpeculative()) {
+        DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
         if (inst->readyToIssue()) {
             toBeScheduled.push_front(inst);
             inst->iqIt = toBeScheduled.begin();
@@ -110,6 +111,7 @@ BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
             inst->iqItValid = true;
         }
     } else {
+        DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
         nonSpec.push_front(inst);
         inst->iqIt = nonSpec.begin();
         inst->iqItValid = true;
@@ -159,6 +161,8 @@ BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
 */
     DynInstPtr inst = nonSpec.back();
 
+    DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
+
     assert(inst->seqNum == sn);
 
     assert(find(NonSpec, inst->iqIt));
@@ -193,6 +197,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
     InstListIt iq_end_it = iq.end();
 
     while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+        DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
         (*iq_it)->iqItValid = false;
         iq.erase(iq_it++);
         --numInsts;
@@ -202,6 +207,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
     iq_end_it = nonSpec.end();
 
     while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+        DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
         (*iq_it)->iqItValid = false;
         nonSpec.erase(iq_it++);
         --numInsts;
@@ -212,6 +218,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
 
     while (iq_it != iq_end_it) {
         if ((*iq_it)->seqNum > sn) {
+            DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
             (*iq_it)->iqItValid = false;
             replayList.erase(iq_it++);
             --numInsts;
@@ -243,20 +250,24 @@ BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
     std::vector<DynInstPtr> &dependents = inst->getDependents();
     int num_outputs = dependents.size();
 
+    DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
+
     for (int i = 0; i < num_outputs; i++) {
-        DynInstPtr inst = dependents[i];
-        inst->markSrcRegReady();
-        if (inst->readyToIssue() && inst->iqItValid) {
-            if (inst->isNonSpeculative()) {
-                assert(find(NonSpec, inst->iqIt));
-                nonSpec.erase(inst->iqIt);
+        DynInstPtr dep_inst = dependents[i];
+        dep_inst->markSrcRegReady();
+        DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
+
+        if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
+            if (dep_inst->isNonSpeculative()) {
+                assert(find(NonSpec, dep_inst->iqIt));
+                nonSpec.erase(dep_inst->iqIt);
             } else {
-                assert(find(IQ, inst->iqIt));
-                iq.erase(inst->iqIt);
+                assert(find(IQ, dep_inst->iqIt));
+                iq.erase(dep_inst->iqIt);
             }
 
-            toBeScheduled.push_front(inst);
-            inst->iqIt = toBeScheduled.begin();
+            toBeScheduled.push_front(dep_inst);
+            dep_inst->iqIt = toBeScheduled.begin();
         }
     }
     return num_outputs;
@@ -266,6 +277,7 @@ template <class Impl>
 void
 BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
 {
+    DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
     assert(!inst->iqItValid);
     replayList.push_front(inst);
     inst->iqIt = replayList.begin();
@@ -277,11 +289,14 @@ template <class Impl>
 void
 BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
 {
+    DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
     assert(find(ReplayList, inst->iqIt));
     InstListIt iq_it = --replayList.end();
     InstListIt iq_end_it = replayList.end();
     while (iq_it != iq_end_it) {
         DynInstPtr rescheduled_inst = (*iq_it);
+
+        DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
         replayList.erase(iq_it--);
         toBeScheduled.push_front(rescheduled_inst);
         rescheduled_inst->iqIt = toBeScheduled.begin();
@@ -952,6 +967,9 @@ BackEnd<Impl>::tick()
 
     commitInsts();
 
+    DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
+            IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
+
     assert(numInsts == instList.size());
 }
 
@@ -1034,11 +1052,11 @@ BackEnd<Impl>::dispatchInsts()
         // Get instruction from front of time buffer
         DynInstPtr inst = dispatch.front();
         dispatch.pop_front();
+        --dispatchSize;
 
         if (inst->isSquashed())
             continue;
 
-        --dispatchSize;
         ++numInsts;
         instList.push_back(inst);
 
@@ -1118,6 +1136,7 @@ template <class Impl>
 void
 BackEnd<Impl>::checkDispatchStatus()
 {
+    DPRINTF(BE, "Checking dispatch status\n");
     assert(dispatchStatus == Blocked);
     if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
         DPRINTF(BE, "Dispatch no longer blocked\n");
@@ -1526,6 +1545,24 @@ BackEnd<Impl>::commitInst(int inst_num)
     // Write the done sequence number here.
     toIEW->doneSeqNum = inst->seqNum;
 
+#if FULL_SYSTEM
+    int count = 0;
+    Addr oldpc;
+    do {
+        if (count == 0)
+            assert(!thread->inSyscall && !thread->trapPending);
+        oldpc = thread->readPC();
+        cpu->system->pcEventQueue.service(
+            thread->getXCProxy());
+        count++;
+    } while (oldpc != thread->readPC());
+    if (count > 1) {
+        DPRINTF(BE, "PC skip function event, stopping commit\n");
+//        completed_last_inst = false;
+//        squashPending = true;
+        return false;
+    }
+#endif
     return true;
 }
 
@@ -1566,7 +1603,11 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
 
     while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
     {
-        DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
+        if ((*insts_it)->isSquashed()) {
+            --insts_it;
+            continue;
+        }
+        DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
                 (*insts_it)->readPC(),
                 (*insts_it)->seqNum);
 
@@ -1576,9 +1617,12 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
 
         (*insts_it)->setCanCommit();
 
+        // Be careful with IPRs and such here
         for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
-            renameTable[(*insts_it)->destRegIdx(i)] =
-                (*insts_it)->getPrevDestInst(i);
+            DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+            DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+                    (int)(*insts_it)->destRegIdx(i), prev_dest);
+            renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
             ++freed_regs;
         }
 
@@ -1592,7 +1636,11 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
 
     while (!instList.empty() && (*insts_it)->seqNum > sn)
     {
-        DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
+        if ((*insts_it)->isSquashed()) {
+            --insts_it;
+            continue;
+        }
+        DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
                 (*insts_it)->readPC(),
                 (*insts_it)->seqNum);
 
@@ -1603,8 +1651,10 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
         (*insts_it)->setCanCommit();
 
         for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
-            renameTable[(*insts_it)->destRegIdx(i)] =
-                (*insts_it)->getPrevDestInst(i);
+            DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+            DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+                    (int)(*insts_it)->destRegIdx(i), prev_dest);
+            renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
             ++freed_regs;
         }
 
@@ -1649,6 +1699,7 @@ template <class Impl>
 void
 BackEnd<Impl>::fetchFault(Fault &fault)
 {
+    faultFromFetch = fault;
 }
 
 template <class Impl>
diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 200ced265..17e0f5c42 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -42,6 +42,7 @@
 #include "cpu/pc_event.hh"
 #include "cpu/static_inst.hh"
 #include "mem/mem_interface.hh"
+#include "mem/page_table.hh"
 #include "sim/eventq.hh"
 
 // forward declarations
@@ -427,34 +428,22 @@ class OzoneCPU : public BaseCPU
     int getInstAsid() { return thread.asid; }
     int getDataAsid() { return thread.asid; }
 
-    Fault dummyTranslation(MemReqPtr &req)
-    {
-#if 0
-        assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
-        // put the asid in the upper 16 bits of the paddr
-        req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
-        req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
-        return NoFault;
-    }
-
     /** Translates instruction requestion in syscall emulation mode. */
     Fault translateInstReq(MemReqPtr &req)
     {
-        return dummyTranslation(req);
+        return this->pTable->translate(req);
     }
 
     /** Translates data read request in syscall emulation mode. */
     Fault translateDataReadReq(MemReqPtr &req)
     {
-        return dummyTranslation(req);
+        return this->pTable->translate(req);
     }
 
     /** Translates data write request in syscall emulation mode. */
     Fault translateDataWriteReq(MemReqPtr &req)
     {
-        return dummyTranslation(req);
+        return this->pTable->translate(req);
     }
 #endif
     /** CPU read function, forwards read to LSQ. */
@@ -500,6 +489,7 @@ class OzoneCPU : public BaseCPU
     bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
     bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
     bool simPalCheck(int palFunc);
+    void processInterrupts();
 #else
     void syscall();
     void setSyscallReturn(SyscallReturn return_value, int tid);
diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc
index 0146dd1bd..8ac6858b0 100644
--- a/cpu/ozone/cpu_builder.cc
+++ b/cpu/ozone/cpu_builder.cc
@@ -45,7 +45,7 @@ SimObjectParam<AlphaITB *> itb;
 SimObjectParam<AlphaDTB *> dtb;
 #else
 SimObjectVectorParam<Process *> workload;
-//SimObjectParam<PageTable *> page_table;
+SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
 SimObjectParam<FunctionalMemory *> mem;
@@ -159,7 +159,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
     INIT_PARAM(dtb, "Data translation buffer"),
 #else
     INIT_PARAM(workload, "Processes to run"),
-//    INIT_PARAM(page_table, "Page table"),
+    INIT_PARAM(page_table, "Page table"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -310,7 +310,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
     params->dtb = dtb;
 #else
     params->workload = workload;
-//    params->pTable = page_table;
+    params->pTable = page_table;
 #endif // FULL_SYSTEM
 
     params->mem = mem;
@@ -440,7 +440,7 @@ SimObjectParam<AlphaITB *> itb;
 SimObjectParam<AlphaDTB *> dtb;
 #else
 SimObjectVectorParam<Process *> workload;
-//SimObjectParam<PageTable *> page_table;
+SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
 SimObjectParam<FunctionalMemory *> mem;
@@ -554,7 +554,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
     INIT_PARAM(dtb, "Data translation buffer"),
 #else
     INIT_PARAM(workload, "Processes to run"),
-//    INIT_PARAM(page_table, "Page table"),
+    INIT_PARAM(page_table, "Page table"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -705,7 +705,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
     params->dtb = dtb;
 #else
     params->workload = workload;
-//    params->pTable = page_table;
+    params->pTable = page_table;
 #endif // FULL_SYSTEM
 
     params->mem = mem;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 36ec30b2c..c205ad319 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -765,7 +765,7 @@ void
 OzoneCPU<Impl>::squashFromXC()
 {
     thread.inSyscall = true;
-    backEnd->squashFromXC();
+    backEnd->generateXCEvent();
 }
 
 #if !FULL_SYSTEM
@@ -832,6 +832,58 @@ OzoneCPU<Impl>::hwrei()
     return NoFault;
 }
 
+template <class Impl>
+void
+OzoneCPU<Impl>::processInterrupts()
+{
+    // Check for interrupts here.  For now can copy the code that
+    // exists within isa_fullsys_traits.hh.  Also assume that thread 0
+    // is the one that handles the interrupts.
+
+    // Check if there are any outstanding interrupts
+    //Handle the interrupts
+    int ipl = 0;
+    int summary = 0;
+
+    checkInterrupts = false;
+
+    if (thread.readMiscReg(IPR_ASTRR))
+        panic("asynchronous traps not implemented\n");
+
+    if (thread.readMiscReg(IPR_SIRR)) {
+        for (int i = INTLEVEL_SOFTWARE_MIN;
+             i < INTLEVEL_SOFTWARE_MAX; i++) {
+            if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    uint64_t interrupts = intr_status();
+
+    if (interrupts) {
+        for (int i = INTLEVEL_EXTERNAL_MIN;
+             i < INTLEVEL_EXTERNAL_MAX; i++) {
+            if (interrupts & (ULL(1) << i)) {
+                // See table 4-19 of the 21164 hardware reference
+                ipl = i;
+                summary |= (ULL(1) << i);
+            }
+        }
+    }
+
+    if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
+        thread.setMiscReg(IPR_ISR, summary);
+        thread.setMiscReg(IPR_INTID, ipl);
+        Fault fault = new InterruptFault;
+        fault->invoke(thread.getXCProxy());
+        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+                thread.readMiscReg(IPR_IPLR), ipl, summary);
+    }
+}
+
 template <class Impl>
 bool
 OzoneCPU<Impl>::simPalCheck(int palFunc)
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 5e257b506..251f4200c 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -76,6 +76,10 @@ class FrontEnd
     bool processBarriers(DynInstPtr &inst);
 
     void handleFault(Fault &fault);
+  public:
+    Fault getFault() { return fetchFault; }
+  private:
+    Fault fetchFault;
 
     // Align an address (typically a PC) to the start of an I-cache block.
     // We fold in the PISA 64- to 32-bit conversion here as well.
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index 0136d0ef0..af452fe95 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -48,6 +48,7 @@ FrontEnd<Impl>::FrontEnd(Params *params)
 #if !FULL_SYSTEM
     pTable = params->pTable;
 #endif
+    fetchFault = NoFault;
 }
 
 template <class Impl>
@@ -273,6 +274,7 @@ FrontEnd<Impl>::tick()
             Fault fault = fetchCacheLine();
             if (fault != NoFault) {
                 handleFault(fault);
+                fetchFault = fault;
                 return;
             }
             fetchCacheLineNextCycle = false;
@@ -349,7 +351,7 @@ FrontEnd<Impl>::fetchCacheLine()
     // Read a cache line, based on the current PC.
 #if FULL_SYSTEM
     // Flag to say whether or not address is physical addr.
-    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
+    unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
 #else
     unsigned flags = 0;
 #endif // FULL_SYSTEM
@@ -503,6 +505,9 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
     DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
             squash_num, next_PC);
 
+    if (fetchFault != NoFault)
+        fetchFault = NoFault;
+
     while (!instBuffer.empty() &&
            instBuffer.back()->seqNum > squash_num) {
         DynInstPtr inst = instBuffer.back();
@@ -604,9 +609,13 @@ FrontEnd<Impl>::addFreeRegs(int num_freed)
         status = Running;
     }
 
+    DPRINTF(FE, "Adding %i freed registers\n", num_freed);
+
     freeRegs+= num_freed;
 
-    assert(freeRegs <= numPhysRegs);
+//    assert(freeRegs <= numPhysRegs);
+    if (freeRegs > numPhysRegs)
+        freeRegs = numPhysRegs;
 }
 
 template <class Impl>
diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh
index e621f6c01..6519b79e5 100644
--- a/cpu/ozone/inorder_back_end.hh
+++ b/cpu/ozone/inorder_back_end.hh
@@ -54,6 +54,7 @@ class InorderBackEnd
     void squash(const InstSeqNum &squash_num, const Addr &next_PC);
 
     void squashFromXC();
+    void generateXCEvent() { }
 
     bool robEmpty() { return instList.empty(); }
 
diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh
index 3c3e3988c..4b600af67 100644
--- a/cpu/ozone/lsq_unit.hh
+++ b/cpu/ozone/lsq_unit.hh
@@ -567,8 +567,11 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
         req->data = new uint8_t[64];
 
         assert(!req->completionEvent);
-        req->completionEvent =
-            new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], be);
+        typedef typename BackEnd::LdWritebackEvent LdWritebackEvent;
+
+        LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be);
+
+        req->completionEvent = wb;
 
         // Do Cache Access
         MemAccessResult result = dcacheInterface->access(req);
@@ -586,6 +589,8 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
 
             _status = DcacheMissStall;
 
+            wb->setDcacheMiss();
+
         } else {
 //            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
 //                    inst->seqNum);
diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh
index 6c7977250..726348d76 100644
--- a/cpu/ozone/lsq_unit_impl.hh
+++ b/cpu/ozone/lsq_unit_impl.hh
@@ -698,7 +698,7 @@ OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
             stallingLoadIdx = 0;
         }
 
-        loadQueue[load_idx]->squashed = true;
+//        loadQueue[load_idx]->squashed = true;
         loadQueue[load_idx] = NULL;
         --loads;
 
@@ -728,7 +728,7 @@ OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
             stallingStoreIsn = 0;
         }
 
-        storeQueue[store_idx].inst->squashed = true;
+//        storeQueue[store_idx].inst->squashed = true;
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
diff --git a/cpu/ozone/lw_back_end.cc b/cpu/ozone/lw_back_end.cc
new file mode 100644
index 000000000..8e9a56ef5
--- /dev/null
+++ b/cpu/ozone/lw_back_end.cc
@@ -0,0 +1,5 @@
+
+#include "cpu/ozone/lw_back_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+template class LWBackEnd<OzoneImpl>;
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
new file mode 100644
index 000000000..b89957aad
--- /dev/null
+++ b/cpu/ozone/lw_back_end.hh
@@ -0,0 +1,503 @@
+
+#ifndef __CPU_OZONE_LW_BACK_END_HH__
+#define __CPU_OZONE_LW_BACK_END_HH__
+
+#include <list>
+#include <queue>
+#include <set>
+#include <string>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/functional/functional.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
+#include "sim/eventq.hh"
+
+class ExecContext;
+
+template <class Impl>
+class OzoneThreadState;
+
+template <class Impl>
+class LWBackEnd
+{
+  public:
+    typedef OzoneThreadState<Impl> Thread;
+
+    typedef typename Impl::Params Params;
+    typedef typename Impl::DynInst DynInst;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::FrontEnd FrontEnd;
+    typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+    struct SizeStruct {
+        int size;
+    };
+
+    typedef SizeStruct DispatchToIssue;
+    typedef SizeStruct IssueToExec;
+    typedef SizeStruct ExecToCommit;
+    typedef SizeStruct Writeback;
+
+    TimeBuffer<DispatchToIssue> d2i;
+    typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
+    TimeBuffer<IssueToExec> i2e;
+    typename TimeBuffer<IssueToExec>::wire instsToExecute;
+    TimeBuffer<ExecToCommit> e2c;
+    TimeBuffer<Writeback> numInstsToWB;
+
+    TimeBuffer<CommStruct> *comm;
+    typename TimeBuffer<CommStruct>::wire toIEW;
+    typename TimeBuffer<CommStruct>::wire fromCommit;
+
+    class TrapEvent : public Event {
+      private:
+        LWBackEnd<Impl> *be;
+
+      public:
+        TrapEvent(LWBackEnd<Impl> *_be);
+
+        void process();
+        const char *description();
+    };
+
+    /** LdWriteback event for a load completion. */
+    class LdWritebackEvent : public Event {
+      private:
+        /** Instruction that is writing back data to the register file. */
+        DynInstPtr inst;
+        /** Pointer to IEW stage. */
+        LWBackEnd *be;
+
+        bool dcacheMiss;
+
+      public:
+        /** Constructs a load writeback event. */
+        LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
+
+        /** Processes writeback event. */
+        virtual void process();
+        /** Returns the description of the writeback event. */
+        virtual const char *description();
+
+        void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
+    };
+
+    LWBackEnd(Params *params);
+
+    std::string name() const;
+
+    void regStats();
+
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    void setFrontEnd(FrontEnd *front_end_ptr)
+    { frontEnd = front_end_ptr; }
+
+    void setXC(ExecContext *xc_ptr)
+    { xc = xc_ptr; }
+
+    void setThreadState(Thread *thread_ptr)
+    { thread = thread_ptr; }
+
+    void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+    void tick();
+    void squash();
+    void generateXCEvent() { xcSquash = true; }
+    void squashFromXC();
+    void squashFromTrap();
+    void checkInterrupts();
+    bool trapSquash;
+    bool xcSquash;
+
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx);
+
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx);
+
+    Addr readCommitPC() { return commitPC; }
+
+    Addr commitPC;
+
+    bool robEmpty() { return instList.empty(); }
+
+    bool isFull() { return numInsts >= numROBEntries; }
+    bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
+
+    void fetchFault(Fault &fault);
+
+    int wakeDependents(DynInstPtr &inst);
+
+    /** Tells memory dependence unit that a memory instruction needs to be
+     * rescheduled. It will re-execute once replayMemInst() is called.
+     */
+    void rescheduleMemInst(DynInstPtr &inst);
+
+    /** Re-executes all rescheduled memory instructions. */
+    void replayMemInst(DynInstPtr &inst);
+
+    /** Completes memory instruction. */
+    void completeMemInst(DynInstPtr &inst) { }
+
+    void addDcacheMiss(DynInstPtr &inst)
+    {
+        waitingMemOps.insert(inst->seqNum);
+        numWaitingMemOps++;
+        DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
+                inst->seqNum, numWaitingMemOps);
+    }
+
+    void removeDcacheMiss(DynInstPtr &inst)
+    {
+        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+        waitingMemOps.erase(inst->seqNum);
+        numWaitingMemOps--;
+        DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
+                inst->seqNum, numWaitingMemOps);
+    }
+
+    void addWaitingMemOp(DynInstPtr &inst)
+    {
+        waitingMemOps.insert(inst->seqNum);
+        numWaitingMemOps++;
+        DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
+                inst->seqNum, numWaitingMemOps);
+    }
+
+    void removeWaitingMemOp(DynInstPtr &inst)
+    {
+        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+        waitingMemOps.erase(inst->seqNum);
+        numWaitingMemOps--;
+        DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
+                inst->seqNum, numWaitingMemOps);
+    }
+
+    void instToCommit(DynInstPtr &inst);
+
+  private:
+    void generateTrapEvent(Tick latency = 0);
+    void handleFault(Fault &fault, Tick latency = 0);
+    void updateStructures();
+    void dispatchInsts();
+    void dispatchStall();
+    void checkDispatchStatus();
+    void executeInsts();
+    void commitInsts();
+    void addToLSQ(DynInstPtr &inst);
+    void writebackInsts();
+    bool commitInst(int inst_num);
+    void squash(const InstSeqNum &sn);
+    void squashDueToBranch(DynInstPtr &inst);
+    void squashDueToMemViolation(DynInstPtr &inst);
+    void squashDueToMemBlocked(DynInstPtr &inst);
+    void updateExeInstStats(DynInstPtr &inst);
+    void updateComInstStats(DynInstPtr &inst);
+
+  public:
+    FullCPU *cpu;
+
+    FrontEnd *frontEnd;
+
+    ExecContext *xc;
+
+    Thread *thread;
+
+    enum Status {
+        Running,
+        Idle,
+        DcacheMissStall,
+        DcacheMissComplete,
+        Blocked,
+        TrapPending
+    };
+
+    Status status;
+
+    Status dispatchStatus;
+
+    Status commitStatus;
+
+    Counter funcExeInst;
+
+  private:
+//    typedef typename Impl::InstQueue InstQueue;
+
+//    InstQueue IQ;
+
+    typedef typename Impl::LdstQueue LdstQueue;
+
+    LdstQueue LSQ;
+  public:
+    RenameTable<Impl> commitRenameTable;
+
+    RenameTable<Impl> renameTable;
+  private:
+    class DCacheCompletionEvent : public Event
+    {
+      private:
+        LWBackEnd *be;
+
+      public:
+        DCacheCompletionEvent(LWBackEnd *_be);
+
+        virtual void process();
+        virtual const char *description();
+    };
+
+    friend class DCacheCompletionEvent;
+
+    DCacheCompletionEvent cacheCompletionEvent;
+
+    MemInterface *dcacheInterface;
+
+    MemReqPtr memReq;
+
+    // General back end width. Used if the more specific isn't given.
+    int width;
+
+    // Dispatch width.
+    int dispatchWidth;
+    int numDispatchEntries;
+    int dispatchSize;
+
+    int waitingInsts;
+
+    int issueWidth;
+
+    // Writeback width
+    int wbWidth;
+
+    // Commit width
+    int commitWidth;
+
+    /** Index into queue of instructions being written back. */
+    unsigned wbNumInst;
+
+    /** Cycle number within the queue of instructions being written
+     * back.  Used in case there are too many instructions writing
+     * back at the current cycle and writesbacks need to be scheduled
+     * for the future. See comments in instToCommit().
+     */
+    unsigned wbCycle;
+
+    int numROBEntries;
+    int numInsts;
+
+    std::set<InstSeqNum> waitingMemOps;
+    typedef std::set<InstSeqNum>::iterator MemIt;
+    int numWaitingMemOps;
+    unsigned maxOutstandingMemOps;
+
+    bool squashPending;
+    InstSeqNum squashSeqNum;
+    Addr squashNextPC;
+
+    Fault faultFromFetch;
+    bool fetchHasFault;
+
+  private:
+    struct pqCompare {
+        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+        {
+            return lhs->seqNum > rhs->seqNum;
+        }
+    };
+
+    typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
+    ReadyInstQueue exeList;
+
+    typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+    std::list<DynInstPtr> instList;
+    std::list<DynInstPtr> waitingList;
+    std::list<DynInstPtr> replayList;
+    std::list<DynInstPtr> writeback;
+
+    int latency;
+
+    int squashLatency;
+
+    bool exactFullStall;
+
+    bool fetchRedirect[Impl::MaxThreads];
+
+    // number of cycles stalled for D-cache misses
+/*    Stats::Scalar<> dcacheStallCycles;
+      Counter lastDcacheStall;
+*/
+    Stats::Vector<> rob_cap_events;
+    Stats::Vector<> rob_cap_inst_count;
+    Stats::Vector<> iq_cap_events;
+    Stats::Vector<> iq_cap_inst_count;
+    // total number of instructions executed
+    Stats::Vector<> exe_inst;
+    Stats::Vector<> exe_swp;
+    Stats::Vector<> exe_nop;
+    Stats::Vector<> exe_refs;
+    Stats::Vector<> exe_loads;
+    Stats::Vector<> exe_branches;
+
+    Stats::Vector<> issued_ops;
+
+    // total number of loads forwaded from LSQ stores
+    Stats::Vector<> lsq_forw_loads;
+
+    // total number of loads ignored due to invalid addresses
+    Stats::Vector<> inv_addr_loads;
+
+    // total number of software prefetches ignored due to invalid addresses
+    Stats::Vector<> inv_addr_swpfs;
+    // ready loads blocked due to memory disambiguation
+    Stats::Vector<> lsq_blocked_loads;
+
+    Stats::Scalar<> lsqInversion;
+
+    Stats::Vector<> n_issued_dist;
+    Stats::VectorDistribution<> issue_delay_dist;
+
+    Stats::VectorDistribution<> queue_res_dist;
+/*
+    Stats::Vector<> stat_fu_busy;
+    Stats::Vector2d<> stat_fuBusy;
+    Stats::Vector<> dist_unissued;
+    Stats::Vector2d<> stat_issued_inst_type;
+
+    Stats::Formula misspec_cnt;
+    Stats::Formula misspec_ipc;
+    Stats::Formula issue_rate;
+    Stats::Formula issue_stores;
+    Stats::Formula issue_op_rate;
+    Stats::Formula fu_busy_rate;
+    Stats::Formula commit_stores;
+    Stats::Formula commit_ipc;
+    Stats::Formula commit_ipb;
+    Stats::Formula lsq_inv_rate;
+*/
+    Stats::Vector<> writeback_count;
+    Stats::Vector<> producer_inst;
+    Stats::Vector<> consumer_inst;
+    Stats::Vector<> wb_penalized;
+
+    Stats::Formula wb_rate;
+    Stats::Formula wb_fanout;
+    Stats::Formula wb_penalized_rate;
+
+    // total number of instructions committed
+    Stats::Vector<> stat_com_inst;
+    Stats::Vector<> stat_com_swp;
+    Stats::Vector<> stat_com_refs;
+    Stats::Vector<> stat_com_loads;
+    Stats::Vector<> stat_com_membars;
+    Stats::Vector<> stat_com_branches;
+
+    Stats::Distribution<> n_committed_dist;
+
+    Stats::Scalar<> commit_eligible_samples;
+    Stats::Vector<> commit_eligible;
+
+    Stats::Scalar<> ROB_fcount;
+    Stats::Formula ROB_full_rate;
+
+    Stats::Vector<>  ROB_count;	 // cumulative ROB occupancy
+    Stats::Formula ROB_occ_rate;
+    Stats::VectorDistribution<> ROB_occ_dist;
+  public:
+    void dumpInsts();
+};
+
+template <class Impl>
+template <class T>
+Fault
+LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+/*    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataReadReq(memReq);
+
+    // if we have a cache, do cache access too
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Read;
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
+            // Fix this hack for keeping funcExeInst correct with loads that
+            // are executed twice.
+            --funcExeInst;
+
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+//	    status = DcacheMissStall;
+            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+        } else {
+            // do functional access
+            fault = thread->mem->read(memReq, data);
+
+        }
+    }
+*/
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Read");
+*/
+    return LSQ.read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+/*
+    memReq->reset(addr, sizeof(T), flags);
+
+    // translate to physical address
+    Fault fault = cpu->translateDataWriteReq(memReq);
+
+    if (fault == NoFault && dcacheInterface) {
+        memReq->cmd = Write;
+        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
+        memReq->completionEvent = NULL;
+        memReq->time = curTick;
+        memReq->flags &= ~INST_READ;
+        MemAccessResult result = dcacheInterface->access(memReq);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        if (result != MA_HIT && dcacheInterface->doEvents()) {
+            memReq->completionEvent = &cacheCompletionEvent;
+            lastDcacheStall = curTick;
+//	    unscheduleTickEvent();
+//	    status = DcacheMissStall;
+            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+        }
+    }
+
+    if (res && (fault == NoFault))
+        *res = memReq->result;
+        */
+/*
+    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+        recordEvent("Uncached Write");
+*/
+    return LSQ.write(req, data, store_idx);
+}
+
+#endif // __CPU_OZONE_LW_BACK_END_HH__
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
new file mode 100644
index 000000000..115821787
--- /dev/null
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -0,0 +1,1486 @@
+
+#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/ozone/lw_back_end.hh"
+
+template <class Impl>
+void
+LWBackEnd<Impl>::generateTrapEvent(Tick latency)
+{
+    DPRINTF(BE, "Generating trap event\n");
+
+    TrapEvent *trap = new TrapEvent(this);
+
+    trap->schedule(curTick + latency);
+
+    thread->trapPending = true;
+}
+
+template <class Impl>
+int
+LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst)
+{
+    assert(!inst->isSquashed());
+    std::vector<DynInstPtr> &dependents = inst->getDependents();
+    int num_outputs = dependents.size();
+
+    DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
+
+    for (int i = 0; i < num_outputs; i++) {
+        DynInstPtr dep_inst = dependents[i];
+        dep_inst->markSrcRegReady();
+        DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
+
+        if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
+            !dep_inst->isNonSpeculative()) {
+            DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
+                    dep_inst->seqNum);
+            exeList.push(dep_inst);
+            if (dep_inst->iqItValid) {
+                DPRINTF(BE, "Removing instruction from waiting list\n");
+                waitingList.erase(dep_inst->iqIt);
+                waitingInsts--;
+                dep_inst->iqItValid = false;
+                assert(waitingInsts >= 0);
+            }
+            if (dep_inst->isMemRef()) {
+                removeWaitingMemOp(dep_inst);
+                DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n",
+                        dep_inst->seqNum);
+            }
+        }
+    }
+    return num_outputs;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst)
+{
+    replayList.push_front(inst);
+}
+
+template <class Impl>
+LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be)
+    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::TrapEvent::process()
+{
+    be->trapSquash = true;
+}
+
+template <class Impl>
+const char *
+LWBackEnd<Impl>::TrapEvent::description()
+{
+    return "Trap event";
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
+{
+    bool found_inst = false;
+    while (!replayList.empty()) {
+        exeList.push(replayList.front());
+        if (replayList.front() == inst) {
+            found_inst = true;
+        }
+        replayList.pop_front();
+    }
+    assert(found_inst);
+}
+
+template<class Impl>
+LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
+                                                  LWBackEnd<Impl> *_be)
+    : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+LWBackEnd<Impl>::LdWritebackEvent::process()
+{
+    DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
+//    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+//    iewStage->wakeCPU();
+
+    if (dcacheMiss) {
+        be->removeDcacheMiss(inst);
+    }
+
+    if (inst->isSquashed()) {
+        inst = NULL;
+        return;
+    }
+
+    if (!inst->isExecuted()) {
+        inst->setExecuted();
+
+        // Execute again to copy data to proper place.
+        inst->completeAcc();
+    }
+
+    // Need to insert instruction into queue to commit
+    be->instToCommit(inst);
+
+    //wroteToTimeBuffer = true;
+//    iewStage->activityThisCycle();
+
+    inst = NULL;
+}
+
+template<class Impl>
+const char *
+LWBackEnd<Impl>::LdWritebackEvent::description()
+{
+    return "Load writeback event";
+}
+
+
+template <class Impl>
+LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be)
+    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::DCacheCompletionEvent::process()
+{
+}
+
+template <class Impl>
+const char *
+LWBackEnd<Impl>::DCacheCompletionEvent::description()
+{
+    return "Cache completion event";
+}
+
+template <class Impl>
+LWBackEnd<Impl>::LWBackEnd(Params *params)
+    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+      xcSquash(false), cacheCompletionEvent(this),
+      dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+      exactFullStall(true)
+{
+    numROBEntries = params->numROBEntries;
+    numInsts = 0;
+    numDispatchEntries = 32;
+    maxOutstandingMemOps = 4;
+    numWaitingMemOps = 0;
+    waitingInsts = 0;
+//    IQ.setBE(this);
+    LSQ.setBE(this);
+
+    // Setup IQ and LSQ with their parameters here.
+    instsToDispatch = d2i.getWire(-1);
+
+    instsToExecute = i2e.getWire(-1);
+
+//    IQ.setIssueExecQueue(&i2e);
+
+    dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
+    issueWidth = params->issueWidth ? params->issueWidth : width;
+    wbWidth = params->wbWidth ? params->wbWidth : width;
+    commitWidth = params->commitWidth ? params->commitWidth : width;
+
+    LSQ.init(params, params->LQEntries, params->SQEntries, 0);
+
+    dispatchStatus = Running;
+}
+
+template <class Impl>
+std::string
+LWBackEnd<Impl>::name() const
+{
+    return cpu->name() + ".backend";
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::regStats()
+{
+    using namespace Stats;
+    rob_cap_events
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:cap_events")
+        .desc("number of cycles where ROB cap was active")
+        .flags(total)
+        ;
+
+    rob_cap_inst_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:cap_inst")
+        .desc("number of instructions held up by ROB cap")
+        .flags(total)
+        ;
+
+    iq_cap_events
+        .init(cpu->number_of_threads)
+        .name(name() +".IQ:cap_events" )
+        .desc("number of cycles where IQ cap was active")
+        .flags(total)
+        ;
+
+    iq_cap_inst_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".IQ:cap_inst")
+        .desc("number of instructions held up by IQ cap")
+        .flags(total)
+        ;
+
+
+    exe_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:count")
+        .desc("number of insts issued")
+        .flags(total)
+        ;
+
+    exe_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:swp")
+        .desc("number of swp insts issued")
+        .flags(total)
+        ;
+
+    exe_nop
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:nop")
+        .desc("number of nop insts issued")
+        .flags(total)
+        ;
+
+    exe_refs
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:refs")
+        .desc("number of memory reference insts issued")
+        .flags(total)
+        ;
+
+    exe_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:loads")
+        .desc("number of load insts issued")
+        .flags(total)
+        ;
+
+    exe_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:branches")
+        .desc("Number of branches issued")
+        .flags(total)
+        ;
+
+    issued_ops
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:op_count")
+        .desc("number of insts issued")
+        .flags(total)
+        ;
+
+/*
+    for (int i=0; i<Num_OpClasses; ++i) {
+        stringstream subname;
+        subname << opClassStrings[i] << "_delay";
+        issue_delay_dist.subname(i, subname.str());
+    }
+*/
+    //
+    //  Other stats
+    //
+    lsq_forw_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".LSQ:forw_loads")
+        .desc("number of loads forwarded via LSQ")
+        .flags(total)
+        ;
+
+    inv_addr_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:addr_loads")
+        .desc("number of invalid-address loads")
+        .flags(total)
+        ;
+
+    inv_addr_swpfs
+        .init(cpu->number_of_threads)
+        .name(name() + ".ISSUE:addr_swpfs")
+        .desc("number of invalid-address SW prefetches")
+        .flags(total)
+        ;
+
+    lsq_blocked_loads
+        .init(cpu->number_of_threads)
+        .name(name() + ".LSQ:blocked_loads")
+        .desc("number of ready loads not issued due to memory disambiguation")
+        .flags(total)
+        ;
+
+    lsqInversion
+        .name(name() + ".ISSUE:lsq_invert")
+        .desc("Number of times LSQ instruction issued early")
+        ;
+
+    n_issued_dist
+        .init(issueWidth + 1)
+        .name(name() + ".ISSUE:issued_per_cycle")
+        .desc("Number of insts issued each cycle")
+        .flags(total | pdf | dist)
+        ;
+    issue_delay_dist
+        .init(Num_OpClasses,0,99,2)
+        .name(name() + ".ISSUE:")
+        .desc("cycles from operands ready to issue")
+        .flags(pdf | cdf)
+        ;
+
+    queue_res_dist
+        .init(Num_OpClasses, 0, 99, 2)
+        .name(name() + ".IQ:residence:")
+        .desc("cycles from dispatch to issue")
+        .flags(total | pdf | cdf )
+        ;
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        queue_res_dist.subname(i, opClassStrings[i]);
+    }
+
+    writeback_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:count")
+        .desc("cumulative count of insts written-back")
+        .flags(total)
+        ;
+
+    producer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:producers")
+        .desc("num instructions producing a value")
+        .flags(total)
+        ;
+
+    consumer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:consumers")
+        .desc("num instructions consuming a value")
+        .flags(total)
+        ;
+
+    wb_penalized
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:penalized")
+        .desc("number of instrctions required to write to 'other' IQ")
+        .flags(total)
+        ;
+
+
+    wb_penalized_rate
+        .name(name() + ".WB:penalized_rate")
+        .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+        .flags(total)
+        ;
+
+    wb_penalized_rate = wb_penalized / writeback_count;
+
+    wb_fanout
+        .name(name() + ".WB:fanout")
+        .desc("average fanout of values written-back")
+        .flags(total)
+        ;
+
+    wb_fanout = producer_inst / consumer_inst;
+
+    wb_rate
+        .name(name() + ".WB:rate")
+        .desc("insts written-back per cycle")
+        .flags(total)
+        ;
+    wb_rate = writeback_count / cpu->numCycles;
+
+    stat_com_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:count")
+        .desc("Number of instructions committed")
+        .flags(total)
+        ;
+
+    stat_com_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:swp_count")
+        .desc("Number of s/w prefetches committed")
+        .flags(total)
+        ;
+
+    stat_com_refs
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:refs")
+        .desc("Number of memory references committed")
+        .flags(total)
+        ;
+
+    stat_com_loads
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:loads")
+        .desc("Number of loads committed")
+        .flags(total)
+        ;
+
+    stat_com_membars
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:membars")
+        .desc("Number of memory barriers committed")
+        .flags(total)
+        ;
+
+    stat_com_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:branches")
+        .desc("Number of branches committed")
+        .flags(total)
+        ;
+    n_committed_dist
+        .init(0,commitWidth,1)
+        .name(name() + ".COM:committed_per_cycle")
+        .desc("Number of insts commited each cycle")
+        .flags(pdf)
+        ;
+
+    //
+    //  Commit-Eligible instructions...
+    //
+    //  -> The number of instructions eligible to commit in those
+    //  cycles where we reached our commit BW limit (less the number
+    //  actually committed)
+    //
+    //  -> The average value is computed over ALL CYCLES... not just
+    //  the BW limited cycles
+    //
+    //  -> The standard deviation is computed only over cycles where
+    //  we reached the BW limit
+    //
+    commit_eligible
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:bw_limited")
+        .desc("number of insts not committed due to BW limits")
+        .flags(total)
+        ;
+
+    commit_eligible_samples
+        .name(name() + ".COM:bw_lim_events")
+        .desc("number cycles where commit BW limit reached")
+        ;
+
+    ROB_fcount
+        .name(name() + ".ROB:full_count")
+        .desc("number of cycles where ROB was full")
+        ;
+
+    ROB_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".ROB:occupancy")
+        .desc(name() + ".ROB occupancy (cumulative)")
+        .flags(total)
+        ;
+
+    ROB_full_rate
+        .name(name() + ".ROB:full_rate")
+        .desc("ROB full per cycle")
+        ;
+    ROB_full_rate = ROB_fcount / cpu->numCycles;
+
+    ROB_occ_rate
+        .name(name() + ".ROB:occ_rate")
+        .desc("ROB occupancy rate")
+        .flags(total)
+        ;
+    ROB_occ_rate = ROB_count / cpu->numCycles;
+
+    ROB_occ_dist
+        .init(cpu->number_of_threads,0,numROBEntries,2)
+        .name(name() + ".ROB:occ_dist")
+        .desc("ROB Occupancy per cycle")
+        .flags(total | cdf)
+        ;
+
+//    IQ.regStats();
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+    comm = _comm;
+    toIEW = comm->getWire(0);
+    fromCommit = comm->getWire(-1);
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+LWBackEnd<Impl>::checkInterrupts()
+{
+    if (cpu->checkInterrupts &&
+        cpu->check_interrupts() &&
+        !cpu->inPalMode(thread->readPC()) &&
+        !trapSquash &&
+        !xcSquash) {
+        // Will need to squash all instructions currently in flight and have
+        // the interrupt handler restart at the last non-committed inst.
+        // Most of that can be handled through the trap() function.  The
+        // processInterrupts() function really just checks for interrupts
+        // and then calls trap() if there is an interrupt present.
+
+        // Not sure which thread should be the one to interrupt.  For now
+        // always do thread 0.
+        assert(!thread->inSyscall);
+        thread->inSyscall = true;
+
+        // CPU will handle implementation of the interrupt.
+        cpu->processInterrupts();
+
+        // Now squash or record that I need to squash this cycle.
+        commitStatus = TrapPending;
+
+        // Exit state update mode to avoid accidental updating.
+        thread->inSyscall = false;
+
+        // Generate trap squash event.
+        generateTrapEvent();
+
+        DPRINTF(BE, "Interrupt detected.\n");
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
+{
+    DPRINTF(BE, "Handling fault!");
+
+    assert(!thread->inSyscall);
+
+    thread->inSyscall = true;
+
+    // Consider holding onto the trap and waiting until the trap event
+    // happens for this to be executed.
+    fault->invoke(thread->getXCProxy());
+
+    // Exit state update mode to avoid accidental updating.
+    thread->inSyscall = false;
+
+    commitStatus = TrapPending;
+
+    // Generate trap squash event.
+    generateTrapEvent(latency);
+}
+#endif
+
+template <class Impl>
+void
+LWBackEnd<Impl>::tick()
+{
+    DPRINTF(BE, "Ticking back end\n");
+
+    ROB_count[0]+= numInsts;
+
+    wbCycle = 0;
+
+#if FULL_SYSTEM
+    checkInterrupts();
+
+    if (trapSquash) {
+        assert(!xcSquash);
+        squashFromTrap();
+    } else if (xcSquash) {
+        squashFromXC();
+    } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) {
+        DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
+        Fault fetch_fault = frontEnd->getFault();
+        if (fetch_fault == NoFault) {
+            DPRINTF(BE, "Fetch no longer has a fault, cancelling out.\n");
+            fetchHasFault = false;
+        } else {
+            handleFault(fetch_fault);
+            fetchHasFault = false;
+        }
+    }
+#endif
+
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
+    if (dispatchStatus != Blocked) {
+        dispatchInsts();
+    } else {
+        checkDispatchStatus();
+    }
+
+    if (commitStatus != TrapPending) {
+        executeInsts();
+
+        commitInsts();
+    }
+
+    LSQ.writebackStores();
+
+    DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, "
+            "LSQ loads: %i, LSQ stores: %i\n",
+            waitingInsts, numWaitingMemOps, numInsts,
+            LSQ.numLoads(), LSQ.numStores());
+
+#ifdef DEBUG
+    assert(numInsts == instList.size());
+    assert(waitingInsts == waitingList.size());
+    assert(numWaitingMemOps == waitingMemOps.size());
+#endif
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateStructures()
+{
+    if (fromCommit->doneSeqNum) {
+        LSQ.commitLoads(fromCommit->doneSeqNum);
+        LSQ.commitStores(fromCommit->doneSeqNum);
+    }
+
+    if (fromCommit->nonSpecSeqNum) {
+        if (fromCommit->uncached) {
+//            LSQ.executeLoad(fromCommit->lqIdx);
+        } else {
+//            IQ.scheduleNonSpec(
+//                fromCommit->nonSpecSeqNum);
+        }
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst)
+{
+    // Do anything LSQ specific here?
+    LSQ.insert(inst);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dispatchInsts()
+{
+    DPRINTF(BE, "Trying to dispatch instructions.\n");
+
+    while (numInsts < numROBEntries &&
+           numWaitingMemOps < maxOutstandingMemOps) {
+        // Get instruction from front of time buffer
+        DynInstPtr inst = frontEnd->getInst();
+        if (!inst) {
+            break;
+        } else if (inst->isSquashed()) {
+            continue;
+        }
+
+        ++numInsts;
+        instList.push_front(inst);
+
+        inst->setInROB();
+
+        DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
+                inst->seqNum, inst->readPC());
+
+        for (int i = 0; i < inst->numDestRegs(); ++i)
+            renameTable[inst->destRegIdx(i)] = inst;
+
+        if (inst->readyToIssue() && !inst->isNonSpeculative()) {
+            DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+                    inst->seqNum);
+            exeList.push(inst);
+            if (inst->isMemRef()) {
+                LSQ.insert(inst);
+            }
+        } else {
+            if (inst->isNonSpeculative()) {
+                inst->setCanCommit();
+                DPRINTF(BE, "Adding non speculative instruction\n");
+            }
+
+            if (inst->isMemRef()) {
+                addWaitingMemOp(inst);
+                LSQ.insert(inst);
+            }
+
+            DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
+                    "waitingList.\n",
+                    inst->seqNum);
+            waitingList.push_front(inst);
+            inst->iqIt = waitingList.begin();
+            inst->iqItValid = true;
+            waitingInsts++;
+        }
+    }
+
+    // Check if IQ or LSQ is full.  If so we'll need to break and stop
+    // removing instructions.  Also update the number of insts to remove
+    // from the queue.  Check here if we don't care about exact stall
+    // conditions.
+/*
+    bool stall = false;
+    if (IQ.isFull()) {
+        DPRINTF(BE, "IQ is full!\n");
+        stall = true;
+    } else if (LSQ.isFull()) {
+        DPRINTF(BE, "LSQ is full!\n");
+        stall = true;
+    } else if (isFull()) {
+        DPRINTF(BE, "ROB is full!\n");
+        stall = true;
+        ROB_fcount++;
+    }
+    if (stall) {
+        d2i.advance();
+        dispatchStall();
+        return;
+    }
+*/
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dispatchStall()
+{
+    dispatchStatus = Blocked;
+    if (!cpu->decoupledFrontEnd) {
+        // Tell front end to stall here through a timebuffer, or just tell
+        // it directly.
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::checkDispatchStatus()
+{
+    DPRINTF(BE, "Checking dispatch status\n");
+    assert(dispatchStatus == Blocked);
+    if (!LSQ.isFull() && !isFull()) {
+        DPRINTF(BE, "Dispatch no longer blocked\n");
+        dispatchStatus = Running;
+        dispatchInsts();
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::executeInsts()
+{
+    DPRINTF(BE, "Trying to execute instructions\n");
+
+    int num_executed = 0;
+    while (!exeList.empty() && num_executed < issueWidth) {
+        DynInstPtr inst = exeList.top();
+
+        DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
+                inst->seqNum, inst->readPC());
+
+        // Check if the instruction is squashed; if so then skip it
+        // and don't count it towards the FU usage.
+        if (inst->isSquashed()) {
+            DPRINTF(BE, "Execute: Instruction was squashed.\n");
+
+            // Not sure how to handle this plus the method of sending # of
+            // instructions to use.  Probably will just have to count it
+            // towards the bandwidth usage, but not the FU usage.
+            ++num_executed;
+
+            // Consider this instruction executed so that commit can go
+            // ahead and retire the instruction.
+            inst->setExecuted();
+
+            // Not sure if I should set this here or just let commit try to
+            // commit any squashed instructions.  I like the latter a bit more.
+            inst->setCanCommit();
+
+//            ++iewExecSquashedInsts;
+            exeList.pop();
+
+            continue;
+        }
+
+        Fault fault = NoFault;
+
+        // Execute instruction.
+        // Note that if the instruction faults, it will be handled
+        // at the commit stage.
+        if (inst->isMemRef() &&
+            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+            if (dcacheInterface->isBlocked()) {
+                // Should I move the instruction aside?
+                DPRINTF(BE, "Execute: dcache is blocked\n");
+                break;
+            }
+            DPRINTF(BE, "Execute: Initiating access for memory "
+                    "reference.\n");
+
+            if (inst->isLoad()) {
+                LSQ.executeLoad(inst);
+            } else if (inst->isStore()) {
+                LSQ.executeStore(inst);
+                if (inst->req && !(inst->req->flags & LOCKED)) {
+                    inst->setExecuted();
+
+                    instToCommit(inst);
+                }
+            } else {
+                panic("Unknown mem type!");
+            }
+        } else {
+            inst->execute();
+
+            inst->setExecuted();
+
+            instToCommit(inst);
+        }
+
+        updateExeInstStats(inst);
+
+        ++funcExeInst;
+        ++num_executed;
+        // keep an instruction count
+        thread->numInst++;
+        thread->numInsts++;
+
+        exeList.pop();
+
+        if (inst->mispredicted()) {
+            squashDueToBranch(inst);
+            break;
+        } else if (LSQ.violation()) {
+            // Get the DynInst that caused the violation.  Note that this
+            // clears the violation signal.
+            DynInstPtr violator;
+            violator = LSQ.getMemDepViolator();
+
+            DPRINTF(BE, "LDSTQ detected a violation.  Violator PC: "
+                    "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                    violator->readPC(), inst->readPC(), inst->physEffAddr);
+
+            // Squash.
+            squashDueToMemViolation(inst);
+        }
+    }
+
+    issued_ops[0]+= num_executed;
+    n_issued_dist[num_executed]++;
+}
+
+template<class Impl>
+void
+LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
+{
+
+    DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
+            inst->seqNum, inst->readPC());
+
+    if (!inst->isSquashed()) {
+        DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                inst->seqNum, inst->readPC());
+
+        inst->setCanCommit();
+
+        if (inst->isExecuted()) {
+            inst->setCompleted();
+            int dependents = wakeDependents(inst);
+            if (dependents) {
+                producer_inst[0]++;
+                consumer_inst[0]+= dependents;
+            }
+        }
+    }
+
+    writeback_count[0]++;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::writebackInsts()
+{
+    int wb_width = wbWidth;
+    // Using this method I'm not quite sure how to prevent an
+    // instruction from waking its own dependents multiple times,
+    // without the guarantee that commit always has enough bandwidth
+    // to accept all instructions being written back.  This guarantee
+    // might not be too unrealistic.
+    InstListIt wb_inst_it = writeback.begin();
+    InstListIt wb_end_it = writeback.end();
+    int inst_num = 0;
+    int consumer_insts = 0;
+
+    for (; inst_num < wb_width &&
+             wb_inst_it != wb_end_it; inst_num++) {
+        DynInstPtr inst = (*wb_inst_it);
+
+        // Some instructions will be sent to commit without having
+        // executed because they need commit to handle them.
+        // E.g. Uncached loads have not actually executed when they
+        // are first sent to commit.  Instead commit must tell the LSQ
+        // when it's ready to execute the uncached load.
+        if (!inst->isSquashed()) {
+            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            inst->setCanCommit();
+            inst->setCompleted();
+
+            if (inst->isExecuted()) {
+                int dependents = wakeDependents(inst);
+                if (dependents) {
+                    producer_inst[0]++;
+                    consumer_insts+= dependents;
+                }
+            }
+        }
+
+        writeback.erase(wb_inst_it++);
+    }
+    LSQ.writebackStores();
+    consumer_inst[0]+= consumer_insts;
+    writeback_count[0]+= inst_num;
+}
+
+template <class Impl>
+bool
+LWBackEnd<Impl>::commitInst(int inst_num)
+{
+    // Read instruction from the head of the ROB
+    DynInstPtr inst = instList.back();
+
+    // Make sure instruction is valid
+    assert(inst);
+
+    if (!inst->readyToCommit())
+        return false;
+
+    DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
+            inst->seqNum, inst->readPC());
+
+    thread->setPC(inst->readPC());
+    thread->setNextPC(inst->readNextPC());
+    inst->reachedCommit = true;
+
+    // If the instruction is not executed yet, then it is a non-speculative
+    // or store inst.  Signal backwards that it should be executed.
+    if (!inst->isExecuted()) {
+        if (inst->isNonSpeculative()) {
+#if !FULL_SYSTEM
+            // Hack to make sure syscalls aren't executed until all stores
+            // write back their data.  This direct communication shouldn't
+            // be used for anything other than this.
+            if (inst_num > 0 || LSQ.hasStoresToWB())
+#else
+            if ((inst->isMemBarrier() || inst->isWriteBarrier() ||
+                    inst->isQuiesce()) &&
+                LSQ.hasStoresToWB())
+#endif
+            {
+                DPRINTF(BE, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+
+            DPRINTF(BE, "Encountered a store or non-speculative "
+                    "instruction at the head of the ROB, PC %#x.\n",
+                    inst->readPC());
+
+            // Send back the non-speculative instruction's sequence number.
+            if (inst->iqItValid) {
+                DPRINTF(BE, "Removing instruction from waiting list\n");
+                waitingList.erase(inst->iqIt);
+                inst->iqItValid = false;
+                waitingInsts--;
+                assert(waitingInsts >= 0);
+                if (inst->isStore())
+                    removeWaitingMemOp(inst);
+            }
+
+            exeList.push(inst);
+
+            // Change the instruction so it won't try to commit again until
+            // it is executed.
+            inst->clearCanCommit();
+
+//            ++commitNonSpecStalls;
+
+            return false;
+        } else if (inst->isLoad()) {
+            DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            // Send back the non-speculative instruction's sequence
+            // number.  Maybe just tell the lsq to re-execute the load.
+
+            // Send back the non-speculative instruction's sequence number.
+            if (inst->iqItValid) {
+                DPRINTF(BE, "Removing instruction from waiting list\n");
+                waitingList.erase(inst->iqIt);
+                inst->iqItValid = false;
+                waitingInsts--;
+                assert(waitingInsts >= 0);
+                removeWaitingMemOp(inst);
+            }
+            replayMemInst(inst);
+
+            inst->clearCanCommit();
+
+            return false;
+        } else {
+            panic("Trying to commit un-executed instruction "
+                  "of unknown type!\n");
+        }
+    }
+
+    // Now check if it's one of the special trap or barrier or
+    // serializing instructions.
+    if (inst->isThreadSync())
+    {
+        // Not handled for now.
+        panic("Thread sync instructions are not handled yet.\n");
+    }
+
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = inst->getFault();
+
+    if (inst_fault != NoFault) {
+        if (!inst->isNop()) {
+            DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+                    inst->seqNum, inst->readPC());
+            thread->setInst(
+                static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+#if FULL_SYSTEM
+            handleFault(inst_fault);
+            return false;
+#else // !FULL_SYSTEM
+            panic("fault (%d) detected @ PC %08p", inst_fault,
+                  inst->PC);
+#endif // FULL_SYSTEM
+        }
+    }
+
+    if (inst->isControl()) {
+//        ++commitCommittedBranches;
+    }
+
+    int freed_regs = 0;
+
+    for (int i = 0; i < inst->numDestRegs(); ++i) {
+        DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+                (int)inst->destRegIdx(i), inst->seqNum);
+        thread->renameTable[inst->destRegIdx(i)] = inst;
+        ++freed_regs;
+    }
+
+    if (inst->traceData) {
+        inst->traceData->finalize();
+        inst->traceData = NULL;
+    }
+
+    inst->clearDependents();
+
+    frontEnd->addFreeRegs(freed_regs);
+
+    instList.pop_back();
+
+    --numInsts;
+    cpu->numInst++;
+    thread->numInsts++;
+    ++thread->funcExeInst;
+    // Maybe move this to where teh fault is handled; if the fault is handled,
+    // don't try to set this myself as the fault will set it.  If not, then
+    // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4.
+    thread->setPC(thread->readNextPC());
+    updateComInstStats(inst);
+
+    // Write the done sequence number here.
+//    LSQ.commitLoads(inst->seqNum);
+//    LSQ.commitStores(inst->seqNum);
+    toIEW->doneSeqNum = inst->seqNum;
+
+#if FULL_SYSTEM
+    int count = 0;
+    Addr oldpc;
+    do {
+        if (count == 0)
+            assert(!thread->inSyscall && !thread->trapPending);
+        oldpc = thread->readPC();
+        cpu->system->pcEventQueue.service(
+            thread->getXCProxy());
+        count++;
+    } while (oldpc != thread->readPC());
+    if (count > 1) {
+        DPRINTF(BE, "PC skip function event, stopping commit\n");
+        xcSquash = true;
+        return false;
+    }
+#endif
+    return true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::commitInsts()
+{
+    int commit_width = commitWidth ? commitWidth : width;
+
+    // Not sure this should be a loop or not.
+    int inst_num = 0;
+    while (!instList.empty() && inst_num < commit_width) {
+        if (instList.back()->isSquashed()) {
+            instList.back()->clearDependents();
+            instList.pop_back();
+            --numInsts;
+            continue;
+        }
+
+        if (!commitInst(inst_num++)) {
+            DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
+                    "%#x is head of ROB and not ready\n",
+                    instList.back()->seqNum, instList.back()->readPC());
+            break;
+        }
+    }
+    n_committed_dist.sample(inst_num);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squash(const InstSeqNum &sn)
+{
+    LSQ.squash(sn);
+
+    int freed_regs = 0;
+    InstListIt waiting_list_end = waitingList.end();
+    InstListIt insts_it = waitingList.begin();
+
+    while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+    {
+        if ((*insts_it)->isSquashed()) {
+            ++insts_it;
+            continue;
+        }
+        DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n",
+                (*insts_it)->readPC(),
+                (*insts_it)->seqNum);
+
+        if ((*insts_it)->isMemRef()) {
+            DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n",
+                    (*insts_it)->seqNum);
+            removeWaitingMemOp((*insts_it));
+        }
+
+        waitingList.erase(insts_it++);
+        waitingInsts--;
+    }
+    assert(waitingInsts >= 0);
+
+    insts_it = instList.begin();
+
+    while (!instList.empty() && (*insts_it)->seqNum > sn)
+    {
+        if ((*insts_it)->isSquashed()) {
+            ++insts_it;
+            continue;
+        }
+        DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
+                (*insts_it)->readPC(),
+                (*insts_it)->seqNum);
+
+        // Mark the instruction as squashed, and ready to commit so that
+        // it can drain out of the pipeline.
+        (*insts_it)->setSquashed();
+
+        (*insts_it)->setCanCommit();
+
+        (*insts_it)->removeInROB();
+
+        for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+            DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+            DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+                    (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum);
+            renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
+            ++freed_regs;
+        }
+
+        (*insts_it)->clearDependents();
+
+        instList.erase(insts_it++);
+        --numInsts;
+    }
+
+    insts_it = waitingList.begin();
+    while (!waitingList.empty() && insts_it != waitingList.end()) {
+        if ((*insts_it)->seqNum < sn) {
+            ++insts_it;
+            continue;
+        }
+        assert((*insts_it)->isSquashed());
+
+        waitingList.erase(insts_it++);
+        waitingInsts--;
+    }
+
+    frontEnd->addFreeRegs(freed_regs);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashFromXC()
+{
+    InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+    squash(squashed_inst);
+    frontEnd->squash(squashed_inst, thread->readPC(),
+                     false, false);
+
+    thread->trapPending = false;
+    thread->inSyscall = false;
+    xcSquash = false;
+    commitStatus = Running;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashFromTrap()
+{
+    InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+    squash(squashed_inst);
+    frontEnd->squash(squashed_inst, thread->readPC(),
+                     false, false);
+
+    thread->trapPending = false;
+    thread->inSyscall = false;
+    trapSquash = false;
+    commitStatus = Running;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+    // Update the branch predictor state I guess
+    DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n",
+            inst->seqNum, inst->readNextPC());
+    squash(inst->seqNum);
+    frontEnd->squash(inst->seqNum, inst->readNextPC(),
+                     true, inst->mispredicted());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst)
+{
+    // Update the branch predictor state I guess
+    DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n",
+            inst->seqNum, inst->readNextPC());
+    squash(inst->seqNum);
+    frontEnd->squash(inst->seqNum, inst->readNextPC(),
+                     false, inst->mispredicted());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
+{
+    DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
+            "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
+
+    squash(inst->seqNum - 1);
+    frontEnd->squash(inst->seqNum - 1, inst->readPC());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::fetchFault(Fault &fault)
+{
+    faultFromFetch = fault;
+    fetchHasFault = true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+    int thread_number = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch())
+        exe_swp[thread_number]++;
+    else
+        exe_inst[thread_number]++;
+#else
+    exe_inst[thread_number]++;
+#endif
+
+    //
+    //  Control operations
+    //
+    if (inst->isControl())
+        exe_branches[thread_number]++;
+
+    //
+    //  Memory operations
+    //
+    if (inst->isMemRef()) {
+        exe_refs[thread_number]++;
+
+        if (inst->isLoad())
+            exe_loads[thread_number]++;
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+    unsigned thread = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch()) {
+        stat_com_swp[thread]++;
+    } else {
+        stat_com_inst[thread]++;
+    }
+#else
+    stat_com_inst[thread]++;
+#endif
+
+    //
+    //  Control Instructions
+    //
+    if (inst->isControl())
+        stat_com_branches[thread]++;
+
+    //
+    //  Memory references
+    //
+    if (inst->isMemRef()) {
+        stat_com_refs[thread]++;
+
+        if (inst->isLoad()) {
+            stat_com_loads[thread]++;
+        }
+    }
+
+    if (inst->isMemBarrier()) {
+        stat_com_membars[thread]++;
+    }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dumpInsts()
+{
+    int num = 0;
+    int valid_num = 0;
+
+    InstListIt inst_list_it = --(instList.end());
+
+    cprintf("ExeList size: %i\n", exeList.size());
+
+    cprintf("Inst list size: %i\n", instList.size());
+
+    while (inst_list_it != instList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+    cprintf("Waiting list size: %i\n", waitingList.size());
+
+    inst_list_it = --(waitingList.end());
+
+    while (inst_list_it != waitingList.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
+    cprintf("waitingMemOps list size: %i\n", waitingMemOps.size());
+
+    MemIt waiting_it = waitingMemOps.begin();
+
+    while (waiting_it != waitingMemOps.end())
+    {
+        cprintf("[sn:%lli] ", (*waiting_it));
+        waiting_it++;
+        ++num;
+    }
+    cprintf("\n");
+}
diff --git a/cpu/ozone/lw_lsq.cc b/cpu/ozone/lw_lsq.cc
new file mode 100644
index 000000000..922228b09
--- /dev/null
+++ b/cpu/ozone/lw_lsq.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/lw_lsq_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class OzoneLWLSQ<OzoneImpl>;
+
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
new file mode 100644
index 000000000..2b2c25b58
--- /dev/null
+++ b/cpu/ozone/lw_lsq.hh
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_LW_LSQ_HH__
+#define __CPU_OZONE_LW_LSQ_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <algorithm>
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/mem_interface.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+class PageTable;
+
+/**
+ * Class that implements the actual LQ and SQ for each specific thread.
+ * Both are circular queues; load entries are freed upon committing, while
+ * store entries are freed once they writeback. The LSQUnit tracks if there
+ * are memory ordering violations, and also detects partial load to store
+ * forwarding cases (a store only has part of a load's data) that requires
+ * the load to wait until the store writes back. In the former case it
+ * holds onto the instruction until the dependence unit looks at it, and
+ * in the latter it stalls the LSQ until the store writes back. At that
+ * point the load is replayed.
+ */
+template <class Impl>
+class OzoneLWLSQ {
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::BackEnd BackEnd;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::IssueStruct IssueStruct;
+
+    typedef TheISA::IntReg IntReg;
+
+    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
+
+  private:
+    class StoreCompletionEvent : public Event {
+      public:
+        /** Constructs a store completion event. */
+        StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
+                             Event *wb_event, OzoneLWLSQ *lsq_ptr);
+
+        /** Processes the store completion event. */
+        void process();
+
+        /** Returns the description of this event. */
+        const char *description();
+
+      private:
+        /** The store index of the store being written back. */
+        DynInstPtr inst;
+
+        BackEnd *be;
+        /** The writeback event for the store.  Needed for store
+         * conditionals.
+         */
+        Event *wbEvent;
+        /** The pointer to the LSQ unit that issued the store. */
+        OzoneLWLSQ<Impl> *lsqPtr;
+    };
+
+  public:
+    /** Constructs an LSQ unit. init() must be called prior to use. */
+    OzoneLWLSQ();
+
+    /** Initializes the LSQ unit with the specified number of entries. */
+    void init(Params *params, unsigned maxLQEntries,
+              unsigned maxSQEntries, unsigned id);
+
+    /** Returns the name of the LSQ unit. */
+    std::string name() const;
+
+    /** Sets the CPU pointer. */
+    void setCPU(FullCPU *cpu_ptr)
+    { cpu = cpu_ptr; }
+
+    /** Sets the back-end stage pointer. */
+    void setBE(BackEnd *be_ptr)
+    { be = be_ptr; }
+
+    /** Sets the page table pointer. */
+    void setPageTable(PageTable *pt_ptr);
+
+    /** Ticks the LSQ unit, which in this case only resets the number of
+     * used cache ports.
+     * @todo: Move the number of used ports up to the LSQ level so it can
+     * be shared by all LSQ units.
+     */
+    void tick() { usedPorts = 0; }
+
+    /** Inserts an instruction. */
+    void insert(DynInstPtr &inst);
+    /** Inserts a load instruction. */
+    void insertLoad(DynInstPtr &load_inst);
+    /** Inserts a store instruction. */
+    void insertStore(DynInstPtr &store_inst);
+
+    /** Executes a load instruction. */
+    Fault executeLoad(DynInstPtr &inst);
+
+//    Fault executeLoad(int lq_idx);
+    /** Executes a store instruction. */
+    Fault executeStore(DynInstPtr &inst);
+
+    /** Commits the head load. */
+    void commitLoad();
+    /** Commits loads older than a specific sequence number. */
+    void commitLoads(InstSeqNum &youngest_inst);
+
+    /** Commits stores older than a specific sequence number. */
+    void commitStores(InstSeqNum &youngest_inst);
+
+    /** Writes back stores. */
+    void writebackStores();
+
+    // @todo: Include stats in the LSQ unit.
+    //void regStats();
+
+    /** Clears all the entries in the LQ. */
+    void clearLQ();
+
+    /** Clears all the entries in the SQ. */
+    void clearSQ();
+
+    /** Resizes the LQ to a given size. */
+    void resizeLQ(unsigned size);
+
+    /** Resizes the SQ to a given size. */
+    void resizeSQ(unsigned size);
+
+    /** Squashes all instructions younger than a specific sequence number. */
+    void squash(const InstSeqNum &squashed_num);
+
+    /** Returns if there is a memory ordering violation. Value is reset upon
+     * call to getMemDepViolator().
+     */
+    bool violation() { return memDepViolator; }
+
+    /** Returns the memory ordering violator. */
+    DynInstPtr getMemDepViolator();
+
+    /** Returns if a load became blocked due to the memory system.  It clears
+     *  the bool's value upon this being called.
+     */
+    bool loadBlocked()
+    { return isLoadBlocked; }
+
+    void clearLoadBlocked()
+    { isLoadBlocked = false; }
+
+    bool isLoadBlockedHandled()
+    { return loadBlockedHandled; }
+
+    void setLoadBlockedHandled()
+    { loadBlockedHandled = true; }
+
+    /** Returns the number of free entries (min of free LQ and SQ entries). */
+    unsigned numFreeEntries();
+
+    /** Returns the number of loads ready to execute. */
+    int numLoadsReady();
+
+    /** Returns the number of loads in the LQ. */
+    int numLoads() { return loads; }
+
+    /** Returns the number of stores in the SQ. */
+    int numStores() { return stores; }
+
+    /** Returns if either the LQ or SQ is full. */
+    bool isFull() { return lqFull() || sqFull(); }
+
+    /** Returns if the LQ is full. */
+    bool lqFull() { return loads >= (LQEntries - 1); }
+
+    /** Returns if the SQ is full. */
+    bool sqFull() { return stores >= (SQEntries - 1); }
+
+    /** Debugging function to dump instructions in the LSQ. */
+    void dumpInsts();
+
+    /** Returns the number of instructions in the LSQ. */
+    unsigned getCount() { return loads + stores; }
+
+    /** Returns if there are any stores to writeback. */
+    bool hasStoresToWB() { return storesToWB; }
+
+    /** Returns the number of stores to writeback. */
+    int numStoresToWB() { return storesToWB; }
+
+    /** Returns if the LSQ unit will writeback on this cycle. */
+    bool willWB() { return storeQueue.back().canWB &&
+                        !storeQueue.back().completed &&
+                        !dcacheInterface->isBlocked(); }
+
+  private:
+    /** Completes the store at the specified index. */
+    void completeStore(int store_idx);
+
+  private:
+    /** Pointer to the CPU. */
+    FullCPU *cpu;
+
+    /** Pointer to the back-end stage. */
+    BackEnd *be;
+
+    /** Pointer to the D-cache. */
+    MemInterface *dcacheInterface;
+
+    /** Pointer to the page table. */
+    PageTable *pTable;
+
+  public:
+    struct SQEntry {
+        /** Constructs an empty store queue entry. */
+        SQEntry()
+            : inst(NULL), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0), lqIt(NULL)
+        { }
+
+        /** Constructs a store queue entry for a given instruction. */
+        SQEntry(DynInstPtr &_inst)
+            : inst(_inst), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0), lqIt(NULL)
+        { }
+
+        /** The store instruction. */
+        DynInstPtr inst;
+        /** The memory request for the store. */
+        MemReqPtr req;
+        /** The size of the store. */
+        int size;
+        /** The store data. */
+        IntReg data;
+        /** Whether or not the store can writeback. */
+        bool canWB;
+        /** Whether or not the store is committed. */
+        bool committed;
+        /** Whether or not the store is completed. */
+        bool completed;
+
+        typename std::list<DynInstPtr>::iterator lqIt;
+    };
+
+    enum Status {
+        Running,
+        Idle,
+        DcacheMissStall,
+        DcacheMissSwitch
+    };
+
+  private:
+    /** The OzoneLWLSQ thread id. */
+    unsigned lsqID;
+
+    /** The status of the LSQ unit. */
+    Status _status;
+
+    /** The store queue. */
+//    std::vector<SQEntry> storeQueue;
+    std::list<SQEntry> storeQueue;
+    /** The load queue. */
+//    std::vector<DynInstPtr> loadQueue;
+    std::list<DynInstPtr> loadQueue;
+
+    typedef typename std::list<SQEntry>::iterator SQIt;
+    typedef typename std::list<DynInstPtr>::iterator LQIt;
+
+
+    struct HashFn {
+    size_t operator() (const int a) const
+    {
+        unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
+
+        return hash;
+    }
+    };
+
+    m5::hash_map<int, SQIt, HashFn> SQItHash;
+    std::queue<int> SQIndices;
+    m5::hash_map<int, LQIt, HashFn> LQItHash;
+    std::queue<int> LQIndices;
+
+    typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
+    typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
+    // Consider making these 16 bits
+    /** The number of LQ entries. */
+    unsigned LQEntries;
+    /** The number of SQ entries. */
+    unsigned SQEntries;
+
+    /** The number of load instructions in the LQ. */
+    int loads;
+    /** The number of store instructions in the SQ (excludes those waiting to
+     * writeback).
+     */
+    int stores;
+
+    int storesToWB;
+
+    /// @todo Consider moving to a more advanced model with write vs read ports
+    /** The number of cache ports available each cycle. */
+    int cachePorts;
+
+    /** The number of used cache ports in this cycle. */
+    int usedPorts;
+
+    //list<InstSeqNum> mshrSeqNums;
+
+     //Stats::Scalar<> dcacheStallCycles;
+    Counter lastDcacheStall;
+
+    // Make these per thread?
+    /** Whether or not the LSQ is stalled. */
+    bool stalled;
+    /** The store that causes the stall due to partial store to load
+     * forwarding.
+     */
+    InstSeqNum stallingStoreIsn;
+    /** The index of the above store. */
+//    int stallingLoadIdx;
+    LQIt stallingLoad;
+
+    /** Whether or not a load is blocked due to the memory system.  It is
+     *  cleared when this value is checked via loadBlocked().
+     */
+    bool isLoadBlocked;
+
+    bool loadBlockedHandled;
+
+    InstSeqNum blockedLoadSeqNum;
+
+    /** The oldest faulting load instruction. */
+    DynInstPtr loadFaultInst;
+    /** The oldest faulting store instruction. */
+    DynInstPtr storeFaultInst;
+
+    /** The oldest load that caused a memory ordering violation. */
+    DynInstPtr memDepViolator;
+
+    // Will also need how many read/write ports the Dcache has.  Or keep track
+    // of that in stage that is one level up, and only call executeLoad/Store
+    // the appropriate number of times.
+
+  public:
+    /** Executes the load at the given index. */
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx);
+
+    /** Executes the store at the given index. */
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx);
+
+    /** Returns the index of the head load instruction. */
+//    int getLoadHead() { return loadHead; }
+    /** Returns the sequence number of the head load instruction. */
+    InstSeqNum getLoadHeadSeqNum()
+    {
+        if (!loadQueue.empty()) {
+            return loadQueue.back()->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns the index of the head store instruction. */
+//    int getStoreHead() { return storeHead; }
+    /** Returns the sequence number of the head store instruction. */
+    InstSeqNum getStoreHeadSeqNum()
+    {
+        if (!storeQueue.empty()) {
+            return storeQueue.back().inst->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns whether or not the LSQ unit is stalled. */
+    bool isStalled()  { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+    //Depending on issue2execute delay a squashed load could
+    //execute if it is found to be squashed in the same
+    //cycle it is scheduled to execute
+    typename m5::hash_map<int, LQIt, HashFn>::iterator
+        lq_hash_it = LQItHash.find(load_idx);
+    assert(lq_hash_it != LQItHash.end());
+    DynInstPtr inst = (*(*lq_hash_it).second);
+
+    if (inst->isExecuted()) {
+        panic("Should not reach this point with split ops!");
+
+        memcpy(&data,req->data,req->size);
+
+        return NoFault;
+    }
+
+    // Make sure this isn't an uncacheable access
+    // A bit of a hackish way to get uncached accesses to work only if they're
+    // at the head of the LSQ and are ready to commit (at the head of the ROB
+    // too).
+    // @todo: Fix uncached accesses.
+    if (req->flags & UNCACHEABLE &&
+        (inst != loadQueue.back() || !inst->reachedCommit)) {
+        DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
+                "commit/LSQ!\n",
+                inst->seqNum);
+        be->rescheduleMemInst(inst);
+        return TheISA::genMachineCheckFault();
+    }
+
+    // Check the SQ for any previous stores that might lead to forwarding
+    SQIt sq_it = storeQueue.begin();
+    int store_size = 0;
+
+    DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
+            load_idx, req->paddr);
+
+    while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
+        ++sq_it;
+
+    while (1) {
+        // End once we've reached the top of the LSQ
+        if (sq_it == storeQueue.end()) {
+            break;
+        }
+
+        assert((*sq_it).inst);
+
+        store_size = (*sq_it).size;
+
+        if (store_size == 0) {
+            sq_it++;
+            continue;
+        }
+
+        // Check if the store data is within the lower and upper bounds of
+        // addresses that the request needs.
+        bool store_has_lower_limit =
+            req->vaddr >= (*sq_it).inst->effAddr;
+        bool store_has_upper_limit =
+            (req->vaddr + req->size) <= ((*sq_it).inst->effAddr +
+                                         store_size);
+        bool lower_load_has_store_part =
+            req->vaddr < ((*sq_it).inst->effAddr +
+                           store_size);
+        bool upper_load_has_store_part =
+            (req->vaddr + req->size) > (*sq_it).inst->effAddr;
+
+        // If the store's data has all of the data needed, we can forward.
+        if (store_has_lower_limit && store_has_upper_limit) {
+
+            int shift_amt = req->vaddr & (store_size - 1);
+            // Assumes byte addressing
+            shift_amt = shift_amt << 3;
+
+            // Cast this to type T?
+            data = (*sq_it).data >> shift_amt;
+
+            req->cmd = Read;
+            assert(!req->completionEvent);
+            req->completionEvent = NULL;
+            req->time = curTick;
+            assert(!req->data);
+            req->data = new uint8_t[64];
+
+            memcpy(req->data, &data, req->size);
+
+            DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
+                    "[sn:%lli] addr %#x, data %#x\n",
+                    (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data));
+
+            typename BackEnd::LdWritebackEvent *wb =
+                new typename BackEnd::LdWritebackEvent(inst,
+                                                       be);
+
+            // We'll say this has a 1 cycle load-store forwarding latency
+            // for now.
+            // FIXME - Need to make this a parameter.
+            wb->schedule(curTick);
+
+            // Should keep track of stat for forwarded data
+            return NoFault;
+        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+                   (store_has_upper_limit && upper_load_has_store_part) ||
+                   (lower_load_has_store_part && upper_load_has_store_part)) {
+            // This is the partial store-load forwarding case where a store
+            // has only part of the load's data.
+
+            // If it's already been written back, then don't worry about
+            // stalling on it.
+            if ((*sq_it).completed) {
+                sq_it++;
+                break;
+            }
+
+            // Must stall load and force it to retry, so long as it's the oldest
+            // load that needs to do so.
+            if (!stalled ||
+                (stalled &&
+                 inst->seqNum <
+                 (*stallingLoad)->seqNum)) {
+                stalled = true;
+                stallingStoreIsn = (*sq_it).inst->seqNum;
+                stallingLoad = (*lq_hash_it).second;
+            }
+
+            // Tell IQ/mem dep unit that this instruction will need to be
+            // rescheduled eventually
+            be->rescheduleMemInst(inst);
+
+            DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
+                    "Store [sn:%lli] to load addr %#x\n",
+                    (*sq_it).inst->seqNum, req->vaddr);
+
+            return NoFault;
+        }
+        sq_it++;
+    }
+
+
+    // If there's no forwarding case, then go access memory
+    ++usedPorts;
+
+    // if we have a cache, do cache access too
+    if (dcacheInterface) {
+        if (dcacheInterface->isBlocked()) {
+            // There's an older load that's already going to squash.
+            if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
+                return NoFault;
+
+            isLoadBlocked = true;
+            loadBlockedHandled = false;
+            blockedLoadSeqNum = inst->seqNum;
+            // No fault occurred, even though the interface is blocked.
+            return NoFault;
+        }
+
+        DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
+                "vaddr:%#x flags:%i\n",
+                inst->readPC(), req->paddr, req->vaddr, req->flags);
+
+        // Setup MemReq pointer
+        req->cmd = Read;
+        req->completionEvent = NULL;
+        req->time = curTick;
+        assert(!req->data);
+        req->data = new uint8_t[64];
+
+        assert(!req->completionEvent);
+        req->completionEvent =
+            new typename BackEnd::LdWritebackEvent(inst, be);
+
+        // Do Cache Access
+        MemAccessResult result = dcacheInterface->access(req);
+
+        // Ugly hack to get an event scheduled *only* if the access is
+        // a miss.  We really should add first-class support for this
+        // at some point.
+        // @todo: Probably should support having no events
+        if (result != MA_HIT) {
+            DPRINTF(OzoneLSQ, "D-cache miss!\n");
+            DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+                    inst->seqNum);
+
+            lastDcacheStall = curTick;
+
+            _status = DcacheMissStall;
+
+        } else {
+//            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+//                    inst->seqNum);
+
+            DPRINTF(OzoneLSQ, "D-cache hit!\n");
+        }
+    } else {
+        fatal("Must use D-cache with new memory system");
+    }
+
+    return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+    assert(sq_hash_it != SQItHash.end());
+
+    SQIt sq_it = (*sq_hash_it).second;
+    assert((*sq_it).inst);
+
+    DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
+            " | [sn:%lli]\n",
+            store_idx, req->paddr, data, (*sq_it).inst->seqNum);
+
+    (*sq_it).req = req;
+    (*sq_it).size = sizeof(T);
+    (*sq_it).data = data;
+
+    // This function only writes the data to the store queue, so no fault
+    // can happen here.
+    return NoFault;
+}
+
+#endif // __CPU_OZONE_LW_LSQ_HH__
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
new file mode 100644
index 000000000..54d7ead6c
--- /dev/null
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -0,0 +1,766 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/isa_traits.hh"
+#include "base/str.hh"
+#include "cpu/ozone/lw_lsq.hh"
+
+template <class Impl>
+OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
+                                                             BackEnd *_be,
+                                                             Event *wb_event,
+                                                             OzoneLWLSQ<Impl> *lsq_ptr)
+    : Event(&mainEventQueue),
+      inst(_inst),
+      be(_be),
+      wbEvent(wb_event),
+      lsqPtr(lsq_ptr)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
+{
+    DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n",
+            inst->seqNum);
+
+    //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+//    lsqPtr->cpu->wakeCPU();
+    if (wbEvent) {
+        wbEvent->process();
+        delete wbEvent;
+    }
+
+    lsqPtr->completeStore(inst->sqIdx);
+    be->removeDcacheMiss(inst);
+}
+
+template <class Impl>
+const char *
+OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
+{
+    return "LSQ store completion event";
+}
+
+template <class Impl>
+OzoneLWLSQ<Impl>::OzoneLWLSQ()
+    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+      loadBlockedHandled(false)
+{
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
+                     unsigned maxSQEntries, unsigned id)
+
+{
+    DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
+
+    lsqID = id;
+
+    LQEntries = maxLQEntries;
+    SQEntries = maxSQEntries;
+
+    for (int i = 0; i < LQEntries * 10; i++) {
+        LQIndices.push(i);
+        SQIndices.push(i);
+    }
+
+    // May want to initialize these entries to NULL
+
+//    loadHead = loadTail = 0;
+
+//    storeHead = storeWBIdx = storeTail = 0;
+
+    usedPorts = 0;
+    cachePorts = params->cachePorts;
+
+    dcacheInterface = params->dcacheInterface;
+
+    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+    blockedLoadSeqNum = 0;
+}
+
+template<class Impl>
+std::string
+OzoneLWLSQ<Impl>::name() const
+{
+    return "lsqunit";
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::clearLQ()
+{
+    loadQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::clearSQ()
+{
+    storeQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+    DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
+    pTable = pt_ptr;
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
+{
+    assert( size >= LQEntries);
+
+    if (size > LQEntries) {
+        while (size > loadQueue.size()) {
+            DynInstPtr dummy;
+            loadQueue.push_back(dummy);
+            LQEntries++;
+        }
+    } else {
+        LQEntries = size;
+    }
+
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::resizeSQ(unsigned size)
+{
+    if (size > SQEntries) {
+        while (size > storeQueue.size()) {
+            SQEntry dummy;
+            storeQueue.push_back(dummy);
+            SQEntries++;
+        }
+    } else {
+        SQEntries = size;
+    }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
+{
+    // Make sure we really have a memory reference.
+    assert(inst->isMemRef());
+
+    // Make sure it's one of the two classes of memory references.
+    assert(inst->isLoad() || inst->isStore());
+
+    if (inst->isLoad()) {
+        insertLoad(inst);
+    } else {
+        insertStore(inst);
+    }
+
+//    inst->setInLSQ();
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+    assert(!LQIndices.empty());
+    int load_index = LQIndices.front();
+    LQIndices.pop();
+
+    DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+            load_inst->readPC(), load_index, load_inst->seqNum);
+
+    load_inst->lqIdx = load_index;
+
+    loadQueue.push_front(load_inst);
+    LQItHash[load_index] = loadQueue.begin();
+
+    ++loads;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+    // Make sure it is not full before inserting an instruction.
+    assert(stores - storesToWB < SQEntries);
+
+    assert(!SQIndices.empty());
+    int store_index = SQIndices.front();
+    SQIndices.pop();
+
+    DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+            store_inst->readPC(), store_index, store_inst->seqNum);
+
+    store_inst->sqIdx = store_index;
+    SQEntry entry(store_inst);
+    if (loadQueue.empty()) {
+        entry.lqIt = loadQueue.end();
+    } else {
+        entry.lqIt = loadQueue.begin();
+    }
+    storeQueue.push_front(entry);
+
+    SQItHash[store_index] = storeQueue.begin();
+
+    ++stores;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+OzoneLWLSQ<Impl>::getMemDepViolator()
+{
+    DynInstPtr temp = memDepViolator;
+
+    memDepViolator = NULL;
+
+    return temp;
+}
+
+template <class Impl>
+unsigned
+OzoneLWLSQ<Impl>::numFreeEntries()
+{
+    unsigned free_lq_entries = LQEntries - loads;
+    unsigned free_sq_entries = SQEntries - stores;
+
+    // Both the LQ and SQ entries have an extra dummy entry to differentiate
+    // empty/full conditions.  Subtract 1 from the free entries.
+    if (free_lq_entries < free_sq_entries) {
+        return free_lq_entries - 1;
+    } else {
+        return free_sq_entries - 1;
+    }
+}
+
+template <class Impl>
+int
+OzoneLWLSQ<Impl>::numLoadsReady()
+{
+    int retval = 0;
+    LQIt lq_it = loadQueue.begin();
+    LQIt end_it = loadQueue.end();
+
+    while (lq_it != end_it) {
+        if ((*lq_it)->readyToIssue()) {
+            ++retval;
+        }
+    }
+
+    return retval;
+}
+
+template <class Impl>
+Fault
+OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+    // Execute a specific load.
+    Fault load_fault = NoFault;
+
+    DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
+            inst->readPC(),inst->seqNum);
+
+    // Make sure it's really in the list.
+    // Normally it should always be in the list.  However,
+    /* due to a syscall it may not be the list.
+#ifdef DEBUG
+    int i = loadHead;
+    while (1) {
+        if (i == loadTail && !find(inst)) {
+            assert(0 && "Load not in the queue!");
+        } else if (loadQueue[i] == inst) {
+            break;
+        }
+
+        i = i + 1;
+        if (i >= LQEntries) {
+            i = 0;
+        }
+    }
+#endif // DEBUG*/
+
+    load_fault = inst->initiateAcc();
+
+    // Might want to make sure that I'm not overwriting a previously faulting
+    // instruction that hasn't been checked yet.
+    // Actually probably want the oldest faulting load
+    if (load_fault != NoFault) {
+        DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
+        // Maybe just set it as can commit here, although that might cause
+        // some other problems with sending traps to the ROB too quickly.
+        be->instToCommit(inst);
+//        iewStage->activityThisCycle();
+    }
+
+    return load_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
+{
+    // Make sure that a store exists.
+    assert(stores != 0);
+
+    int store_idx = store_inst->sqIdx;
+    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+    assert(sq_hash_it != SQItHash.end());
+    DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
+            store_inst->readPC(), store_inst->seqNum);
+
+    SQIt sq_it = (*sq_hash_it).second;
+
+    Fault store_fault = store_inst->initiateAcc();
+
+    // Store size should now be available.  Use it to get proper offset for
+    // addr comparisons.
+    int size = (*sq_it).size;
+
+    if (size == 0) {
+        DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+                store_inst->readPC(),store_inst->seqNum);
+
+        return store_fault;
+    }
+
+    assert(store_fault == NoFault);
+
+    if (!storeFaultInst) {
+        if (store_fault != NoFault) {
+            panic("Fault in a store instruction!");
+            storeFaultInst = store_inst;
+        } else if (store_inst->isNonSpeculative()) {
+            // Nonspeculative accesses (namely store conditionals)
+            // need to set themselves as able to writeback if we
+            // haven't had a fault by here.
+            (*sq_it).canWB = true;
+
+            ++storesToWB;
+            DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n",
+                    storesToWB);
+        }
+    }
+
+    LQIt lq_it = --(loadQueue.end());
+
+    if (!memDepViolator) {
+        while (lq_it != loadQueue.end()) {
+            if ((*lq_it)->seqNum < store_inst->seqNum) {
+                lq_it--;
+                continue;
+            }
+            // Actually should only check loads that have actually executed
+            // Might be safe because effAddr is set to InvalAddr when the
+            // dyn inst is created.
+
+            // Must actually check all addrs in the proper size range
+            // Which is more correct than needs to be.  What if for now we just
+            // assume all loads are quad-word loads, and do the addr based
+            // on that.
+            // @todo: Fix this, magic number being used here
+            if (((*lq_it)->effAddr >> 8) ==
+                (store_inst->effAddr >> 8)) {
+                // A load incorrectly passed this store.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                memDepViolator = (*lq_it);
+
+                return TheISA::genMachineCheckFault();
+            }
+
+            lq_it--;
+        }
+
+        // If we've reached this point, there was no violation.
+        memDepViolator = NULL;
+    }
+
+    return store_fault;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitLoad()
+{
+    assert(!loadQueue.empty());
+
+    DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
+            loadQueue.back()->seqNum, loadQueue.back()->readPC());
+
+    LQIndices.push(loadQueue.back()->lqIdx);
+    LQItHash.erase(loadQueue.back()->lqIdx);
+
+    loadQueue.pop_back();
+
+    --loads;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+    assert(loads == 0 || !loadQueue.empty());
+
+    while (loads != 0 &&
+           loadQueue.back()->seqNum <= youngest_inst) {
+        commitLoad();
+    }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+    assert(stores == 0 || !storeQueue.empty());
+
+    SQIt sq_it = --(storeQueue.end());
+    while (!storeQueue.empty() && sq_it != storeQueue.end()) {
+        assert((*sq_it).inst);
+        if (!(*sq_it).canWB) {
+            if ((*sq_it).inst->seqNum > youngest_inst) {
+                break;
+            }
+            ++storesToWB;
+
+            DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
+                    "%#x [sn:%lli], storesToWB:%i\n",
+                    (*sq_it).inst->readPC(),
+                    (*sq_it).inst->seqNum,
+                    storesToWB);
+
+            (*sq_it).canWB = true;
+        }
+
+        sq_it--;
+    }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::writebackStores()
+{
+    SQIt sq_it = --(storeQueue.end());
+    while (storesToWB > 0 &&
+           sq_it != storeQueue.end() &&
+           (*sq_it).inst &&
+           (*sq_it).canWB &&
+           usedPorts < cachePorts) {
+
+        DynInstPtr inst = (*sq_it).inst;
+
+        if ((*sq_it).size == 0 && !(*sq_it).completed) {
+            sq_it--;
+            completeStore(inst->sqIdx);
+
+            continue;
+        }
+
+        if (inst->isDataPrefetch() || (*sq_it).committed) {
+            sq_it--;
+            continue;
+        }
+
+        if (dcacheInterface && dcacheInterface->isBlocked()) {
+            DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+                    " is blocked!\n");
+            break;
+        }
+
+        ++usedPorts;
+
+        assert((*sq_it).req);
+        assert(!(*sq_it).committed);
+
+        MemReqPtr req = (*sq_it).req;
+        (*sq_it).committed = true;
+
+        req->cmd = Write;
+        req->completionEvent = NULL;
+        req->time = curTick;
+        assert(!req->data);
+        req->data = new uint8_t[64];
+        memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+
+        DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+                "to Addr:%#x, data:%#x [sn:%lli]\n",
+                inst->sqIdx,inst->readPC(),
+                req->paddr, *(req->data),
+                inst->seqNum);
+
+        if (dcacheInterface) {
+            MemAccessResult result = dcacheInterface->access(req);
+
+            if (isStalled() &&
+                inst->seqNum == stallingStoreIsn) {
+                DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+                        "load [sn:%lli]\n",
+                        stallingStoreIsn, (*stallingLoad)->seqNum);
+                stalled = false;
+                stallingStoreIsn = 0;
+                be->replayMemInst((*stallingLoad));
+            }
+
+            if (result != MA_HIT && dcacheInterface->doEvents()) {
+//                Event *wb = NULL;
+
+                typename BackEnd::LdWritebackEvent *wb = NULL;
+                if (req->flags & LOCKED) {
+                    // Stx_C does not generate a system port transaction.
+                    req->result=1;
+                    wb = new typename BackEnd::LdWritebackEvent(inst,
+                                                            be);
+                }
+
+                DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+
+//                DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+//                        inst->seqNum);
+
+                // Will stores need their own kind of writeback events?
+                // Do stores even need writeback events?
+                assert(!req->completionEvent);
+                req->completionEvent = new
+                    StoreCompletionEvent(inst, be, wb, this);
+                be->addDcacheMiss(inst);
+
+                lastDcacheStall = curTick;
+
+                _status = DcacheMissStall;
+
+                // Increment stat here or something
+
+                sq_it--;
+            } else {
+                DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
+                        inst->sqIdx);
+
+//                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+//                        inst->seqNum);
+
+                if (req->flags & LOCKED) {
+                    // Stx_C does not generate a system port transaction.
+                    if (req->flags & UNCACHEABLE) {
+                        req->result = 2;
+                    } else {
+                        req->result = 1;
+                    }
+
+                    typename BackEnd::LdWritebackEvent *wb =
+                        new typename BackEnd::LdWritebackEvent(inst,
+                                                               be);
+                    wb->schedule(curTick);
+                }
+                sq_it--;
+                completeStore(inst->sqIdx);
+            }
+        } else {
+            panic("Must HAVE DCACHE!!!!!\n");
+        }
+    }
+
+    // Not sure this should set it to 0.
+    usedPorts = 0;
+
+    assert(stores >= 0 && storesToWB >= 0);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
+{
+    DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
+            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+
+
+    LQIt lq_it = loadQueue.begin();
+
+    while (loads != 0 && (*lq_it)->seqNum > squashed_num) {
+        assert(!loadQueue.empty());
+        // Clear the smart pointer to make sure it is decremented.
+        DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
+                "[sn:%lli]\n",
+                (*lq_it)->readPC(),
+                (*lq_it)->seqNum);
+
+        if (isStalled() && lq_it == stallingLoad) {
+            stalled = false;
+            stallingStoreIsn = 0;
+            stallingLoad = NULL;
+        }
+
+        --loads;
+
+        // Inefficient!
+        LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx);
+        assert(lq_hash_it != LQItHash.end());
+        LQItHash.erase(lq_hash_it);
+        LQIndices.push((*lq_it)->lqIdx);
+        loadQueue.erase(lq_it++);
+    }
+
+    if (isLoadBlocked) {
+        if (squashed_num < blockedLoadSeqNum) {
+            isLoadBlocked = false;
+            loadBlockedHandled = false;
+            blockedLoadSeqNum = 0;
+        }
+    }
+
+    SQIt sq_it = storeQueue.begin();
+
+    while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
+        assert(!storeQueue.empty());
+        // Clear the smart pointer to make sure it is decremented.
+        DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
+                (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
+                (*sq_it).inst->seqNum);
+
+        // I don't think this can happen.  It should have been cleared by the
+        // stalling load.
+        if (isStalled() &&
+            (*sq_it).inst->seqNum == stallingStoreIsn) {
+            panic("Is stalled should have been cleared by stalling load!\n");
+            stalled = false;
+            stallingStoreIsn = 0;
+        }
+
+        SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx);
+        assert(sq_hash_it != SQItHash.end());
+        SQItHash.erase(sq_hash_it);
+        SQIndices.push((*sq_it).inst->sqIdx);
+        (*sq_it).inst = NULL;
+        (*sq_it).canWB = 0;
+
+        if ((*sq_it).req) {
+            assert(!(*sq_it).req->completionEvent);
+        }
+        (*sq_it).req = NULL;
+        --stores;
+        storeQueue.erase(sq_it++);
+    }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::dumpInsts()
+{
+    cprintf("Load store queue: Dumping instructions.\n");
+    cprintf("Load queue size: %i\n", loads);
+    cprintf("Load queue: ");
+
+    LQIt lq_it = --(loadQueue.end());
+
+    while (lq_it != loadQueue.end() && (*lq_it)) {
+        cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum,
+                (*lq_it)->readPC());
+
+        lq_it--;
+    }
+
+    cprintf("\nStore queue size: %i\n", stores);
+    cprintf("Store queue: ");
+
+    SQIt sq_it = --(storeQueue.end());
+
+    while (sq_it != storeQueue.end() && (*sq_it).inst) {
+        cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n",
+                (*sq_it).inst->seqNum,
+                (*sq_it).inst->readPC(),
+                (*sq_it).size,
+                (*sq_it).committed,
+                (*sq_it).completed,
+                (*sq_it).canWB);
+
+        sq_it--;
+    }
+
+    cprintf("\n");
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(int store_idx)
+{
+    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+    assert(sq_hash_it != SQItHash.end());
+    SQIt sq_it = (*sq_hash_it).second;
+
+    assert((*sq_it).inst);
+    (*sq_it).completed = true;
+    DynInstPtr inst = (*sq_it).inst;
+
+    --storesToWB;
+
+    if (isStalled() &&
+        inst->seqNum == stallingStoreIsn) {
+        DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+                "load [sn:%lli]\n",
+                stallingStoreIsn, (*stallingLoad)->seqNum);
+        stalled = false;
+        stallingStoreIsn = 0;
+        be->replayMemInst((*stallingLoad));
+    }
+
+    DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
+            inst->sqIdx, inst->seqNum, storesToWB);
+
+    // A bit conservative because a store completion may not free up entries,
+    // but hopefully avoids two store completions in one cycle from making
+    // the CPU tick twice.
+//    cpu->activityThisCycle();
+    assert(!storeQueue.empty());
+    SQItHash.erase(sq_hash_it);
+    SQIndices.push(inst->sqIdx);
+    storeQueue.erase(sq_it);
+    --stores;
+/*
+    SQIt oldest_store_it = --(storeQueue.end());
+    if (sq_it == oldest_store_it) {
+        do {
+            inst = (*oldest_store_it).inst;
+            sq_hash_it = SQItHash.find(inst->sqIdx);
+            assert(sq_hash_it != SQItHash.end());
+            SQItHash.erase(sq_hash_it);
+            SQIndices.push(inst->sqIdx);
+            storeQueue.erase(oldest_store_it--);
+
+            --stores;
+        } while ((*oldest_store_it).completed &&
+                 oldest_store_it != storeQueue.end());
+
+//        be->updateLSQNextCycle = true;
+    }
+*/
+}
diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh
index a2c706c60..1f543ec6e 100644
--- a/cpu/ozone/ozone_impl.hh
+++ b/cpu/ozone/ozone_impl.hh
@@ -35,6 +35,8 @@
 #include "cpu/ozone/front_end.hh"
 #include "cpu/ozone/inst_queue.hh"
 #include "cpu/ozone/lsq_unit.hh"
+#include "cpu/ozone/lw_lsq.hh"
+#include "cpu/ozone/lw_back_end.hh"
 #include "cpu/ozone/null_predictor.hh"
 #include "cpu/ozone/dyn_inst.hh"
 #include "cpu/ozone/simple_params.hh"
@@ -55,10 +57,10 @@ struct OzoneImpl {
     typedef TwobitBPredUnit<OzoneImpl> BranchPred;
     typedef FrontEnd<OzoneImpl> FrontEnd;
     // Will need IQ, LSQ eventually
-    typedef BackEnd<OzoneImpl> BackEnd;
+    typedef LWBackEnd<OzoneImpl> BackEnd;
 
     typedef InstQueue<OzoneImpl> InstQueue;
-    typedef OzoneLSQ<OzoneImpl> LdstQueue;
+    typedef OzoneLWLSQ<OzoneImpl> LdstQueue;
 
     typedef OzoneDynInst<OzoneImpl> DynInst;
     typedef RefCountingPtr<DynInst> DynInstPtr;
-- 
cgit v1.2.3


From e704960c80033dd008907caa7c24742a1020d302 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Mon, 24 Apr 2006 17:10:06 -0400
Subject: Updates to Ozone model for quiesce, store conditionals.

--HG--
extra : convert_revision : 72ddd75ad0b5783aca9484e7d178c2915ee8e355
---
 cpu/ozone/cpu.hh              | 122 +++++++++++++++++++++++++++++++++++++++---
 cpu/ozone/cpu_impl.hh         |  62 +++++----------------
 cpu/ozone/dyn_inst_impl.hh    |   1 +
 cpu/ozone/front_end.hh        |  10 ++--
 cpu/ozone/front_end_impl.hh   |  69 +++++++++++++++++++-----
 cpu/ozone/lw_back_end.hh      |   6 ++-
 cpu/ozone/lw_back_end_impl.hh |  61 ++++++++++++---------
 cpu/ozone/lw_lsq.hh           |  25 +++++----
 cpu/ozone/lw_lsq_impl.hh      |  32 +++++++++--
 9 files changed, 272 insertions(+), 116 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 17e0f5c42..d37d3360c 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -42,7 +42,6 @@
 #include "cpu/pc_event.hh"
 #include "cpu/static_inst.hh"
 #include "mem/mem_interface.hh"
-#include "mem/page_table.hh"
 #include "sim/eventq.hh"
 
 // forward declarations
@@ -59,7 +58,6 @@ class GDBListener;
 
 #else
 
-class PageTable;
 class Process;
 
 #endif // FULL_SYSTEM
@@ -349,9 +347,8 @@ class OzoneCPU : public BaseCPU
     // L1 data cache
     MemInterface *dcacheInterface;
 
-#if !FULL_SYSTEM
-    PageTable *pTable;
-#endif
+    /** Pointer to memory. */
+    FunctionalMemory *mem;
 
     FrontEnd *frontEnd;
 
@@ -428,24 +425,62 @@ class OzoneCPU : public BaseCPU
     int getInstAsid() { return thread.asid; }
     int getDataAsid() { return thread.asid; }
 
+    Fault dummyTranslation(MemReqPtr &req)
+    {
+#if 0
+        assert((req->vaddr >> 48 & 0xffff) == 0);
+#endif
+
+        // put the asid in the upper 16 bits of the paddr
+        req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
+        req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
+        return NoFault;
+    }
+
     /** Translates instruction requestion in syscall emulation mode. */
     Fault translateInstReq(MemReqPtr &req)
     {
-        return this->pTable->translate(req);
+        return dummyTranslation(req);
     }
 
     /** Translates data read request in syscall emulation mode. */
     Fault translateDataReadReq(MemReqPtr &req)
     {
-        return this->pTable->translate(req);
+        return dummyTranslation(req);
     }
 
     /** Translates data write request in syscall emulation mode. */
     Fault translateDataWriteReq(MemReqPtr &req)
     {
-        return this->pTable->translate(req);
+        return dummyTranslation(req);
     }
 #endif
+
+    /** Old CPU read from memory function. No longer used. */
+    template <class T>
+    Fault read(MemReqPtr &req, T &data)
+    {
+//	panic("CPU READ NOT IMPLEMENTED W/NEW MEMORY\n");
+#if 0
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+        if (req->flags & LOCKED) {
+            req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+            req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+        }
+#endif
+#endif
+        Fault error;
+        if (req->flags & LOCKED) {
+//            lockAddr = req->paddr;
+            lockFlag = true;
+        }
+
+        error = this->mem->read(req, data);
+        data = gtoh(data);
+        return error;
+    }
+
+
     /** CPU read function, forwards read to LSQ. */
     template <class T>
     Fault read(MemReqPtr &req, T &data, int load_idx)
@@ -453,6 +488,75 @@ class OzoneCPU : public BaseCPU
         return backEnd->read(req, data, load_idx);
     }
 
+    /** Old CPU write to memory function. No longer used. */
+    template <class T>
+    Fault write(MemReqPtr &req, T &data)
+    {
+#if 0
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+        ExecContext *xc;
+
+        // If this is a store conditional, act appropriately
+        if (req->flags & LOCKED) {
+            xc = req->xc;
+
+            if (req->flags & UNCACHEABLE) {
+                // Don't update result register (see stq_c in isa_desc)
+                req->result = 2;
+                xc->setStCondFailures(0);//Needed? [RGD]
+            } else {
+                bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+                Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+                req->result = lock_flag;
+                if (!lock_flag ||
+                    ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+                    xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+                    xc->setStCondFailures(xc->readStCondFailures() + 1);
+                    if (((xc->readStCondFailures()) % 100000) == 0) {
+                        std::cerr << "Warning: "
+                                  << xc->readStCondFailures()
+                                  << " consecutive store conditional failures "
+                                  << "on cpu " << req->xc->readCpuId()
+                                  << std::endl;
+                    }
+                    return NoFault;
+                }
+                else xc->setStCondFailures(0);
+            }
+        }
+
+        // Need to clear any locked flags on other proccessors for
+        // this address.  Only do this for succsful Store Conditionals
+        // and all other stores (WH64?).  Unsuccessful Store
+        // Conditionals would have returned above, and wouldn't fall
+        // through.
+        for (int i = 0; i < this->system->execContexts.size(); i++){
+            xc = this->system->execContexts[i];
+            if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+                (req->paddr & ~0xf)) {
+                xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+            }
+        }
+
+#endif
+#endif
+
+        if (req->flags & LOCKED) {
+            if (req->flags & UNCACHEABLE) {
+                req->result = 2;
+            } else {
+                if (this->lockFlag/* && this->lockAddr == req->paddr*/) {
+                    req->result = 1;
+                } else {
+                    req->result = 0;
+                    return NoFault;
+                }
+            }
+        }
+
+        return this->mem->write(req, (T)htog(data));
+    }
+
     /** CPU write function, forwards write to LSQ. */
     template <class T>
     Fault write(MemReqPtr &req, T &data, int store_idx)
@@ -507,6 +611,8 @@ class OzoneCPU : public BaseCPU
         bool stall;
     };
     TimeBuffer<CommStruct> comm;
+
+    bool lockFlag;
 };
 
 #endif // __CPU_OZONE_CPU_HH__
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index c205ad319..a7bc61603 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -149,12 +149,14 @@ OzoneCPU<Impl>::DCacheCompletionEvent::description()
 template <class Impl>
 OzoneCPU<Impl>::OzoneCPU(Params *p)
 #if FULL_SYSTEM
-    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width),
+    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), mem(p->mem),
 #else
     : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
+      mem(p->workload[0]->getMemory()),
 #endif
       comm(5, 5)
 {
+
     frontEnd = new FrontEnd(p);
     backEnd = new BackEnd(p);
 
@@ -245,51 +247,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     globalSeqNum = 1;
 
     checkInterrupts = false;
-/*
-    fetchRedirBranch = true;
-    fetchRedirExcp = true;
-
-    // Need to initialize the rename maps, and the head and tail pointers.
-    robHeadPtr = new DynInst(this);
-    robTailPtr = new DynInst(this);
-
-    robHeadPtr->setNextInst(robTailPtr);
-//    robHeadPtr->setPrevInst(NULL);
-//    robTailPtr->setNextInst(NULL);
-    robTailPtr->setPrevInst(robHeadPtr);
-
-    robHeadPtr->setCompleted();
-    robTailPtr->setCompleted();
-
-    for (int i = 0; i < ISA::TotalNumRegs; ++i) {
-        renameTable[i] = new DynInst(this);
-        commitTable[i] = new DynInst(this);
 
-        renameTable[i]->setCompleted();
-        commitTable[i]->setCompleted();
-    }
-
-#if FULL_SYSTEM
-    for (int i = 0; i < ISA::NumIntRegs; ++i) {
-        palShadowTable[i] = new DynInst(this);
-        palShadowTable[i]->setCompleted();
-    }
-#endif
-
-    // Size of cache block.
-    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
-
-    // Create mask to get rid of offset bits.
-    cacheBlkMask = (cacheBlkSize - 1);
-
-    // Get the size of an instruction.
-    instSize = sizeof(MachInst);
-
-    // Create space to store a cache line.
-    cacheData = new uint8_t[cacheBlkSize];
-
-    cacheBlkValid = false;
-*/
     for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
         thread.renameTable[i] = new DynInst(this);
         thread.renameTable[i]->setCompleted();
@@ -299,9 +257,11 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     backEnd->renameTable.copyFrom(thread.renameTable);
 
 #if !FULL_SYSTEM
-    pTable = p->pTable;
+//    pTable = p->pTable;
 #endif
 
+    lockFlag = 0;
+
     DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
 }
 
@@ -392,6 +352,7 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
     scheduleTickEvent(delay);
     _status = Running;
     thread._status = ExecContext::Active;
+    frontEnd->wakeFromQuiesce();
 }
 
 template <class Impl>
@@ -401,8 +362,8 @@ OzoneCPU<Impl>::suspendContext(int thread_num)
     // Eventually change this in SMT.
     assert(thread_num == 0);
 //    assert(xcProxy);
-
-    assert(_status == Running);
+    // @todo: Figure out how to initially set the status properly so this is running.
+//    assert(_status == Running);
     notIdleFraction--;
     unscheduleTickEvent();
     _status = Idle;
@@ -665,6 +626,7 @@ OzoneCPU<Impl>::tick()
 {
     DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n");
 
+    _status = Running;
     thread.renameTable[ZeroReg]->setIntResult(0);
     thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]->
         setDoubleResult(0.0);
@@ -756,7 +718,7 @@ OzoneCPU<Impl>::tick()
     // check for instruction-count-based events
     comInstEventQueue[0]->serviceEvents(numInst);
 
-    if (!tickEvent.scheduled())
+    if (!tickEvent.scheduled() && _status == Running)
         tickEvent.schedule(curTick + 1);
 }
 
@@ -821,6 +783,8 @@ OzoneCPU<Impl>::hwrei()
 
     thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
 
+    lockFlag = false;
+
     // Not sure how to make a similar check in the Ozone model
 //    if (!misspeculating()) {
         kernelStats->hwrei();
diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh
index 2d86ced62..c83481c9a 100644
--- a/cpu/ozone/dyn_inst_impl.hh
+++ b/cpu/ozone/dyn_inst_impl.hh
@@ -237,6 +237,7 @@ OzoneDynInst<Impl>::hwrei()
     this->cpu->kernelStats->hwrei();
 
     this->cpu->checkInterrupts = true;
+    this->cpu->lockFlag = false;
 
     // FIXME: XXX check for interrupts? XXX
     return NoFault;
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 251f4200c..2bff2544d 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -60,7 +60,7 @@ class FrontEnd
                 const bool is_branch = false, const bool branch_taken = false);
     DynInstPtr getInst();
 
-    void processCacheCompletion();
+    void processCacheCompletion(MemReqPtr &req);
 
     void addFreeRegs(int num_freed);
 
@@ -109,6 +109,7 @@ class FrontEnd
         SerializeBlocked,
         SerializeComplete,
         RenameBlocked,
+        QuiescePending,
         BEBlocked
     };
 
@@ -130,17 +131,16 @@ class FrontEnd
     class ICacheCompletionEvent : public Event
     {
       private:
+        MemReqPtr req;
         FrontEnd *frontEnd;
 
       public:
-        ICacheCompletionEvent(FrontEnd *_fe);
+        ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *_fe);
 
         virtual void process();
         virtual const char *description();
     };
 
-    ICacheCompletionEvent cacheCompletionEvent;
-
     MemInterface *icacheInterface;
 
 #if !FULL_SYSTEM
@@ -174,6 +174,8 @@ class FrontEnd
     void setPC(Addr val) { PC = val; }
     void setNextPC(Addr val) { nextPC = val; }
 
+    void wakeFromQuiesce();
+
     void dumpInsts();
 
   private:
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index af452fe95..7c18386cf 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -1,4 +1,5 @@
 
+#include "arch/faults.hh"
 #include "arch/isa_traits.hh"
 #include "base/statistics.hh"
 #include "cpu/exec_context.hh"
@@ -12,7 +13,6 @@ using namespace TheISA;
 template <class Impl>
 FrontEnd<Impl>::FrontEnd(Params *params)
     : branchPred(params),
-      cacheCompletionEvent(this),
       icacheInterface(params->icacheInterface),
       instBufferSize(0),
       maxInstBufferSize(params->maxInstBufferSize),
@@ -26,10 +26,12 @@ FrontEnd<Impl>::FrontEnd(Params *params)
     // Setup branch predictor.
 
     // Setup Memory Request
+/*
     memReq = new MemReq();
     memReq->asid = 0;
     memReq->data = new uint8_t[64];
-
+*/
+    memReq = NULL;
     // Size of cache block.
     cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
 
@@ -46,7 +48,7 @@ FrontEnd<Impl>::FrontEnd(Params *params)
     cacheBlkValid = false;
 
 #if !FULL_SYSTEM
-    pTable = params->pTable;
+//    pTable = params->pTable;
 #endif
     fetchFault = NoFault;
 }
@@ -72,7 +74,7 @@ void
 FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
 {
     xc = xc_ptr;
-    memReq->xc = xc;
+//    memReq->xc = xc;
 }
 
 template <class Impl>
@@ -269,6 +271,9 @@ FrontEnd<Impl>::tick()
         }
         updateStatus();
         return;
+    } else if (status == QuiescePending) {
+        DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
+        return;
     } else if (status != IcacheMissComplete) {
         if (fetchCacheLineNextCycle) {
             Fault fault = fetchCacheLine();
@@ -325,6 +330,14 @@ FrontEnd<Impl>::tick()
         // rename(num_inst);
         // }
 
+#if FULL_SYSTEM
+        if (inst->isQuiesce()) {
+            warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+            status = QuiescePending;
+            break;
+        }
+#endif
+
         if (inst->predTaken()) {
             // Start over with tick?
             break;
@@ -364,6 +377,12 @@ FrontEnd<Impl>::fetchCacheLine()
 
     // Setup the memReq to do a read of the first isntruction's address.
     // Set the appropriate read size and flags as well.
+    memReq = new MemReq();
+
+    memReq->asid = 0;
+    memReq->thread_num = 0;
+    memReq->data = new uint8_t[64];
+    memReq->xc = xc;
     memReq->cmd = Read;
     memReq->reset(fetch_PC, cacheBlkSize, flags);
 
@@ -377,16 +396,26 @@ FrontEnd<Impl>::fetchCacheLine()
     // Now do the timing access to see whether or not the instruction
     // exists within the cache.
     if (icacheInterface && fault == NoFault) {
+#if FULL_SYSTEM
+        if (cpu->system->memctrl->badaddr(memReq->paddr)) {
+            DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
+                    "misspeculating path!",
+                    memReq->paddr);
+            return TheISA::genMachineCheckFault();
+        }
+#endif
+
         memReq->completionEvent = NULL;
 
         memReq->time = curTick;
+        fault = cpu->mem->read(memReq, cacheData);
 
         MemAccessResult res = icacheInterface->access(memReq);
 
         // If the cache missed then schedule an event to wake
         // up this stage once the cache miss completes.
         if (icacheInterface->doEvents() && res != MA_HIT) {
-            memReq->completionEvent = new ICacheCompletionEvent(this);
+            memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
 
             status = IcacheMissStall;
 
@@ -398,7 +427,7 @@ FrontEnd<Impl>::fetchCacheLine()
 
             cacheBlkValid = true;
 
-            memcpy(cacheData, memReq->data, memReq->size);
+//            memcpy(cacheData, memReq->data, memReq->size);
         }
     }
 
@@ -541,7 +570,8 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
     // Clear the icache miss if it's outstanding.
     if (status == IcacheMissStall && icacheInterface) {
         DPRINTF(FE, "Squashing outstanding Icache miss.\n");
-        icacheInterface->squash(0);
+//        icacheInterface->squash(0);
+        memReq = NULL;
     }
 
     if (status == SerializeBlocked) {
@@ -577,12 +607,13 @@ FrontEnd<Impl>::getInst()
 
 template <class Impl>
 void
-FrontEnd<Impl>::processCacheCompletion()
+FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
 {
     DPRINTF(FE, "Processing cache completion\n");
 
     // Do something here.
-    if (status != IcacheMissStall) {
+    if (status != IcacheMissStall ||
+        req != memReq) {
         DPRINTF(FE, "Previous fetch was squashed.\n");
         return;
     }
@@ -595,10 +626,11 @@ FrontEnd<Impl>::processCacheCompletion()
         fetchStatus[tid] = IcacheMissComplete;
     }
 */
-    memcpy(cacheData, memReq->data, memReq->size);
+//    memcpy(cacheData, memReq->data, memReq->size);
 
     // Reset the completion event to NULL.
-    memReq->completionEvent = NULL;
+//    memReq->completionEvent = NULL;
+    memReq = NULL;
 }
 
 template <class Impl>
@@ -766,6 +798,15 @@ FrontEnd<Impl>::renameInst(DynInstPtr &inst)
     }
 }
 
+template <class Impl>
+void
+FrontEnd<Impl>::wakeFromQuiesce()
+{
+    DPRINTF(FE, "Waking up from quiesce\n");
+    // Hopefully this is safe
+    status = Running;
+}
+
 template <class Impl>
 void
 FrontEnd<Impl>::dumpInsts()
@@ -786,8 +827,8 @@ FrontEnd<Impl>::dumpInsts()
 }
 
 template <class Impl>
-FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(FrontEnd *fe)
-    : Event(&mainEventQueue, Delayed_Writeback_Pri), frontEnd(fe)
+FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
+    : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
 {
     this->setFlags(Event::AutoDelete);
 }
@@ -796,7 +837,7 @@ template <class Impl>
 void
 FrontEnd<Impl>::ICacheCompletionEvent::process()
 {
-    frontEnd->processCacheCompletion();
+    frontEnd->processCacheCompletion(req);
 }
 
 template <class Impl>
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index b89957aad..f17c93ff4 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -94,8 +94,7 @@ class LWBackEnd
 
     void regStats();
 
-    void setCPU(FullCPU *cpu_ptr)
-    { cpu = cpu_ptr; }
+    void setCPU(FullCPU *cpu_ptr);
 
     void setFrontEnd(FrontEnd *front_end_ptr)
     { frontEnd = front_end_ptr; }
@@ -404,6 +403,9 @@ class LWBackEnd
     Stats::Scalar<> commit_eligible_samples;
     Stats::Vector<> commit_eligible;
 
+    Stats::Vector<> squashedInsts;
+    Stats::Vector<> ROBSquashedInsts;
+
     Stats::Scalar<> ROB_fcount;
     Stats::Formula ROB_full_rate;
 
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index 115821787..d1290239c 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -480,6 +480,18 @@ LWBackEnd<Impl>::regStats()
         .desc("number cycles where commit BW limit reached")
         ;
 
+    squashedInsts
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:squashed_insts")
+        .desc("Number of instructions removed from inst list")
+        ;
+
+    ROBSquashedInsts
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:rob_squashed_insts")
+        .desc("Number of instructions removed from inst list when they reached the head of the ROB")
+        ;
+
     ROB_fcount
         .name(name() + ".ROB:full_count")
         .desc("number of cycles where ROB was full")
@@ -515,6 +527,14 @@ LWBackEnd<Impl>::regStats()
 //    IQ.regStats();
 }
 
+template <class Impl>
+void
+LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+    cpu = cpu_ptr;
+    LSQ.setCPU(cpu_ptr);
+}
+
 template <class Impl>
 void
 LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
@@ -1044,35 +1064,24 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         }
     }
 
-    // Now check if it's one of the special trap or barrier or
-    // serializing instructions.
-    if (inst->isThreadSync())
-    {
-        // Not handled for now.
-        panic("Thread sync instructions are not handled yet.\n");
-    }
+    // Not handled for now.
+    assert(!inst->isThreadSync());
 
     // Check if the instruction caused a fault.  If so, trap.
     Fault inst_fault = inst->getFault();
 
     if (inst_fault != NoFault) {
-        if (!inst->isNop()) {
-            DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
-                    inst->seqNum, inst->readPC());
-            thread->setInst(
-                static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+        DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+                inst->seqNum, inst->readPC());
+        thread->setInst(
+            static_cast<TheISA::MachInst>(inst->staticInst->machInst));
 #if FULL_SYSTEM
-            handleFault(inst_fault);
-            return false;
+        handleFault(inst_fault);
+        return false;
 #else // !FULL_SYSTEM
-            panic("fault (%d) detected @ PC %08p", inst_fault,
-                  inst->PC);
+        panic("fault (%d) detected @ PC %08p", inst_fault,
+              inst->PC);
 #endif // FULL_SYSTEM
-        }
-    }
-
-    if (inst->isControl()) {
-//        ++commitCommittedBranches;
     }
 
     int freed_regs = 0;
@@ -1096,7 +1105,6 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     instList.pop_back();
 
     --numInsts;
-    cpu->numInst++;
     thread->numInsts++;
     ++thread->funcExeInst;
     // Maybe move this to where teh fault is handled; if the fault is handled,
@@ -1134,15 +1142,14 @@ template <class Impl>
 void
 LWBackEnd<Impl>::commitInsts()
 {
-    int commit_width = commitWidth ? commitWidth : width;
-
     // Not sure this should be a loop or not.
     int inst_num = 0;
-    while (!instList.empty() && inst_num < commit_width) {
+    while (!instList.empty() && inst_num < commitWidth) {
         if (instList.back()->isSquashed()) {
             instList.back()->clearDependents();
             instList.pop_back();
             --numInsts;
+            ROBSquashedInsts[instList.back()->threadNumber]++;
             continue;
         }
 
@@ -1150,6 +1157,7 @@ LWBackEnd<Impl>::commitInsts()
             DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
                     "%#x is head of ROB and not ready\n",
                     instList.back()->seqNum, instList.back()->readPC());
+            --inst_num;
             break;
         }
     }
@@ -1217,6 +1225,8 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
 
         (*insts_it)->clearDependents();
 
+        squashedInsts[(*insts_it)->threadNumber]++;
+
         instList.erase(insts_it++);
         --numInsts;
     }
@@ -1350,6 +1360,7 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
 {
     unsigned thread = inst->threadNumber;
 
+    cpu->numInst++;
     //
     //  Pick off the software prefetches
     //
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index 2b2c25b58..eb9886244 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -43,7 +43,7 @@
 //#include "mem/page_table.hh"
 #include "sim/sim_object.hh"
 
-class PageTable;
+//class PageTable;
 
 /**
  * Class that implements the actual LQ and SQ for each specific thread.
@@ -115,7 +115,7 @@ class OzoneLWLSQ {
     { be = be_ptr; }
 
     /** Sets the page table pointer. */
-    void setPageTable(PageTable *pt_ptr);
+//    void setPageTable(PageTable *pt_ptr);
 
     /** Ticks the LSQ unit, which in this case only resets the number of
      * used cache ports.
@@ -243,7 +243,7 @@ class OzoneLWLSQ {
     MemInterface *dcacheInterface;
 
     /** Pointer to the page table. */
-    PageTable *pTable;
+//    PageTable *pTable;
 
   public:
     struct SQEntry {
@@ -562,6 +562,19 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
 
 
     // If there's no forwarding case, then go access memory
+    DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
+            inst->readPC());
+
+
+    // Setup MemReq pointer
+    req->cmd = Read;
+    req->completionEvent = NULL;
+    req->time = curTick;
+    assert(!req->data);
+    req->data = new uint8_t[64];
+    Fault fault = cpu->read(req, data);
+    memcpy(req->data, &data, sizeof(T));
+
     ++usedPorts;
 
     // if we have a cache, do cache access too
@@ -582,12 +595,6 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
                 "vaddr:%#x flags:%i\n",
                 inst->readPC(), req->paddr, req->vaddr, req->flags);
 
-        // Setup MemReq pointer
-        req->cmd = Read;
-        req->completionEvent = NULL;
-        req->time = curTick;
-        assert(!req->data);
-        req->data = new uint8_t[64];
 
         assert(!req->completionEvent);
         req->completionEvent =
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index 54d7ead6c..7b22d2564 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -131,7 +131,7 @@ OzoneLWLSQ<Impl>::clearSQ()
 {
     storeQueue.clear();
 }
-
+/*
 template<class Impl>
 void
 OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
@@ -139,7 +139,7 @@ OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
     DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
     pTable = pt_ptr;
 }
-
+*/
 template<class Impl>
 void
 OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
@@ -519,6 +519,23 @@ OzoneLWLSQ<Impl>::writebackStores()
                 req->paddr, *(req->data),
                 inst->seqNum);
 
+        switch((*sq_it).size) {
+          case 1:
+            cpu->write(req, (uint8_t &)(*sq_it).data);
+            break;
+          case 2:
+            cpu->write(req, (uint16_t &)(*sq_it).data);
+            break;
+          case 4:
+            cpu->write(req, (uint32_t &)(*sq_it).data);
+            break;
+          case 8:
+            cpu->write(req, (uint64_t &)(*sq_it).data);
+            break;
+          default:
+            panic("Unexpected store size!\n");
+        }
+
         if (dcacheInterface) {
             MemAccessResult result = dcacheInterface->access(req);
 
@@ -538,7 +555,7 @@ OzoneLWLSQ<Impl>::writebackStores()
                 typename BackEnd::LdWritebackEvent *wb = NULL;
                 if (req->flags & LOCKED) {
                     // Stx_C does not generate a system port transaction.
-                    req->result=1;
+//                    req->result=1;
                     wb = new typename BackEnd::LdWritebackEvent(inst,
                                                             be);
                 }
@@ -571,12 +588,12 @@ OzoneLWLSQ<Impl>::writebackStores()
 
                 if (req->flags & LOCKED) {
                     // Stx_C does not generate a system port transaction.
-                    if (req->flags & UNCACHEABLE) {
+/*                    if (req->flags & UNCACHEABLE) {
                         req->result = 2;
                     } else {
                         req->result = 1;
                     }
-
+*/
                     typename BackEnd::LdWritebackEvent *wb =
                         new typename BackEnd::LdWritebackEvent(inst,
                                                                be);
@@ -642,6 +659,11 @@ OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
 
     while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
         assert(!storeQueue.empty());
+
+        if ((*sq_it).canWB) {
+            break;
+        }
+
         // Clear the smart pointer to make sure it is decremented.
         DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
                 (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
-- 
cgit v1.2.3


From 31e09892d750d0e6dc7de3d455e34808c159a420 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Mon, 24 Apr 2006 17:11:31 -0400
Subject: Include option for disabling PC symbols.

cpu/inst_seq.hh:
cpu/o3/cpu.cc:
cpu/ozone/cpu_builder.cc:
cpu/ozone/thread_state.hh:
    SE build fixes.

--HG--
extra : convert_revision : a4df6128533105f849b5469f62d83dffe299b7df
---
 cpu/ozone/cpu_builder.cc  | 12 ++++++------
 cpu/ozone/thread_state.hh |  5 +++--
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc
index 8ac6858b0..0146dd1bd 100644
--- a/cpu/ozone/cpu_builder.cc
+++ b/cpu/ozone/cpu_builder.cc
@@ -45,7 +45,7 @@ SimObjectParam<AlphaITB *> itb;
 SimObjectParam<AlphaDTB *> dtb;
 #else
 SimObjectVectorParam<Process *> workload;
-SimObjectParam<PageTable *> page_table;
+//SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
 SimObjectParam<FunctionalMemory *> mem;
@@ -159,7 +159,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
     INIT_PARAM(dtb, "Data translation buffer"),
 #else
     INIT_PARAM(workload, "Processes to run"),
-    INIT_PARAM(page_table, "Page table"),
+//    INIT_PARAM(page_table, "Page table"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -310,7 +310,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
     params->dtb = dtb;
 #else
     params->workload = workload;
-    params->pTable = page_table;
+//    params->pTable = page_table;
 #endif // FULL_SYSTEM
 
     params->mem = mem;
@@ -440,7 +440,7 @@ SimObjectParam<AlphaITB *> itb;
 SimObjectParam<AlphaDTB *> dtb;
 #else
 SimObjectVectorParam<Process *> workload;
-SimObjectParam<PageTable *> page_table;
+//SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
 SimObjectParam<FunctionalMemory *> mem;
@@ -554,7 +554,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
     INIT_PARAM(dtb, "Data translation buffer"),
 #else
     INIT_PARAM(workload, "Processes to run"),
-    INIT_PARAM(page_table, "Page table"),
+//    INIT_PARAM(page_table, "Page table"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -705,7 +705,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
     params->dtb = dtb;
 #else
     params->workload = workload;
-    params->pTable = page_table;
+//    params->pTable = page_table;
 #endif // FULL_SYSTEM
 
     params->mem = mem;
diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh
index c6d23a63b..269fc6459 100644
--- a/cpu/ozone/thread_state.hh
+++ b/cpu/ozone/thread_state.hh
@@ -6,9 +6,10 @@
 #include "arch/isa_traits.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/thread_state.hh"
+#include "sim/process.hh"
 
 class Event;
-class Process;
+//class Process;
 
 #if FULL_SYSTEM
 class EndQuiesceEvent;
@@ -40,7 +41,7 @@ struct OzoneThreadState : public ThreadState {
     }
 #else
     OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
-        : ThreadState(-1, _thread_num, NULL, _process, _asid),
+        : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
           cpu(_cpu), inSyscall(0), trapPending(0)
     {
         memset(&regs, 0, sizeof(TheISA::RegFile));
-- 
cgit v1.2.3


From d363d5aad72b34769c753752a779a13e11532fd8 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Mon, 24 Apr 2006 17:40:00 -0400
Subject: Quiesce stuff.

cpu/ozone/cpu.hh:
    Add quiesce stat (not clear how it should be used yet).
cpu/ozone/cpu_impl.hh:
    Fix for quiesce.

--HG--
extra : convert_revision : a1998818e241374ae3f4c3cabbef885dda55c884
---
 cpu/ozone/cpu.hh      | 2 ++
 cpu/ozone/cpu_impl.hh | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index d37d3360c..56b6571a2 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -613,6 +613,8 @@ class OzoneCPU : public BaseCPU
     TimeBuffer<CommStruct> comm;
 
     bool lockFlag;
+
+    Stats::Scalar<> quiesceCycles;
 };
 
 #endif // __CPU_OZONE_CPU_HH__
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index a7bc61603..17d944e7c 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -413,6 +413,11 @@ OzoneCPU<Impl>::regStats()
         .desc("Percentage of idle cycles")
         ;
 
+    quiesceCycles
+        .name(name() + ".quiesce_cycles")
+        .desc("Number of cycles spent in quiesce")
+        ;
+
     idleFraction = constant(1.0) - notIdleFraction;
 
     frontEnd->regStats();
@@ -609,7 +614,8 @@ OzoneCPU<Impl>::post_interrupt(int int_num, int index)
 {
     BaseCPU::post_interrupt(int_num, index);
 
-    if (thread._status == ExecContext::Suspended) {
+//    if (thread._status == ExecContext::Suspended) {
+    if (_status == Idle) {
         DPRINTF(IPI,"Suspended Processor awoke\n");
 //	thread.activate();
         // Hack for now.  Otherwise might have to go through the xcProxy, or
-- 
cgit v1.2.3


From 21df09cf7aa6bdec5de11904751d355e773a3168 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Thu, 11 May 2006 19:18:36 -0400
Subject: Fixes for ozone CPU to successfully boot and run linux.

cpu/base_dyn_inst.hh:
    Remove snoop function (did not mean to commit it).
cpu/ozone/back_end_impl.hh:
    Set instruction as having its result ready, not completed.
cpu/ozone/cpu.hh:
    Fixes for store conditionals.  Use an additional lock addr list to make sure that the access is valid.  I don't know if this is fully necessary, but it gives me a peace of mind (at some performance cost).
    Make sure to schedule for cycles(1) and not just 1 cycle in the future as tick = 1ps.
    Also support the new Checker.
cpu/ozone/cpu_builder.cc:
    Add parameter for maxOutstandingMemOps so it can be set through the config.
    Also add in the checker.  Right now it's a BaseCPU simobject, but that may change in the future.
cpu/ozone/cpu_impl.hh:
    Add support for the checker.  For now there's a dynamic cast to convert the simobject passed back from the builder to the proper Checker type.  It's ugly, but only happens at startup, and is probably a justified use of dynamic cast.

    Support switching out/taking over from other CPUs.

    Correct indexing problem for float registers.
cpu/ozone/dyn_inst.hh:
    Add ability for instructions to wait on memory instructions in addition to source register instructions.  This is needed for memory dependence predictors and memory barriers.
cpu/ozone/dyn_inst_impl.hh:
    Support waiting on memory operations.
    Use "resultReady" to differentiate an instruction having its registers produced vs being totally completed.
cpu/ozone/front_end.hh:
    Support switching out.
    Also record if an interrupt is pending.
cpu/ozone/front_end_impl.hh:
    Support switching out.  Also support stalling the front end if an interrupt is pending.
cpu/ozone/lw_back_end.hh:
    Add checker in.
    Support switching out.
    Support memory barriers.
cpu/ozone/lw_back_end_impl.hh:
    Lots of changes to get things to work right.
    Faults, traps, interrupts all wait until all stores have written back (important).
    Memory barriers are supported, as is the general ability for instructions to be dependent on other memory instructions.
cpu/ozone/lw_lsq.hh:
    Support switching out.
    Also use store writeback events in all cases, not just dcache misses.
cpu/ozone/lw_lsq_impl.hh:
    Support switching out.
    Also use store writeback events in all cases, not just dcache misses.
    Support the checker CPU.  Marks instructions as completed once the functional access is done (which has to be done for the checker to be able to verify results).
cpu/ozone/simple_params.hh:
    Add max outstanding mem ops parameter.
python/m5/objects/OzoneCPU.py:
    Add max outstanding mem ops, checker.

--HG--
extra : convert_revision : f4d408e1bb1f25836a097b6abe3856111e950c59
---
 cpu/ozone/back_end_impl.hh    |   2 +-
 cpu/ozone/cpu.hh              |  28 ++++-
 cpu/ozone/cpu_builder.cc      |  16 ++-
 cpu/ozone/cpu_impl.hh         | 118 ++++++++++++++-----
 cpu/ozone/dyn_inst.hh         |  40 +++++--
 cpu/ozone/dyn_inst_impl.hh    |  43 ++++++-
 cpu/ozone/front_end.hh        |  13 +++
 cpu/ozone/front_end_impl.hh   |  58 +++++++++-
 cpu/ozone/lw_back_end.hh      |  20 +++-
 cpu/ozone/lw_back_end_impl.hh | 256 +++++++++++++++++++++++++++++++++---------
 cpu/ozone/lw_lsq.hh           |  32 +++++-
 cpu/ozone/lw_lsq_impl.hh      | 189 ++++++++++++++++++++++++-------
 cpu/ozone/simple_params.hh    |   1 +
 13 files changed, 657 insertions(+), 159 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh
index 0b0f04f59..36770d65c 100644
--- a/cpu/ozone/back_end_impl.hh
+++ b/cpu/ozone/back_end_impl.hh
@@ -1385,7 +1385,7 @@ BackEnd<Impl>::writebackInsts()
                     inst->seqNum, inst->readPC());
 
             inst->setCanCommit();
-            inst->setCompleted();
+            inst->setResultReady();
 
             if (inst->isExecuted()) {
                 int dependents = IQ.wakeDependents(inst);
diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 56b6571a2..eec8902d8 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -53,6 +53,7 @@ class AlphaDTB;
 class PhysicalMemory;
 class MemoryController;
 
+class Sampler;
 class RemoteGDB;
 class GDBListener;
 
@@ -69,6 +70,9 @@ namespace Trace {
     class InstRecord;
 }
 
+template <class>
+class Checker;
+
 /**
  * Declaration of Out-of-Order CPU class.  Basically it is a SimpleCPU with
  * simple out-of-order capabilities added to it.  It is still a 1 CPI machine
@@ -226,7 +230,9 @@ class OzoneCPU : public BaseCPU
     };
 
     // execution context proxy
-    OzoneXC xcProxy;
+    OzoneXC ozoneXC;
+    ExecContext *xcProxy;
+    ExecContext *checkerXC;
 
     typedef OzoneThreadState<Impl> ImplState;
 
@@ -245,6 +251,7 @@ class OzoneCPU : public BaseCPU
     void tick();
 
     std::set<InstSeqNum> snList;
+    std::set<Addr> lockAddrList;
   private:
     struct TickEvent : public Event
     {
@@ -262,9 +269,9 @@ class OzoneCPU : public BaseCPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + delay);
+            tickEvent.reschedule(curTick + cycles(delay));
         else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + delay);
+            tickEvent.schedule(curTick + cycles(delay));
     }
 
     /// Unschedule tick event, regardless of its current state.
@@ -322,7 +329,7 @@ class OzoneCPU : public BaseCPU
 
     int cpuId;
 
-    void switchOut();
+    void switchOut(Sampler *sampler);
     void takeOverFrom(BaseCPU *oldCPU);
 
 #if FULL_SYSTEM
@@ -472,6 +479,7 @@ class OzoneCPU : public BaseCPU
         Fault error;
         if (req->flags & LOCKED) {
 //            lockAddr = req->paddr;
+            lockAddrList.insert(req->paddr);
             lockFlag = true;
         }
 
@@ -546,7 +554,13 @@ class OzoneCPU : public BaseCPU
                 req->result = 2;
             } else {
                 if (this->lockFlag/* && this->lockAddr == req->paddr*/) {
-                    req->result = 1;
+                    if (lockAddrList.find(req->paddr) !=
+                        lockAddrList.end()) {
+                        req->result = 1;
+                    } else {
+                        req->result = 0;
+                        return NoFault;
+                    }
                 } else {
                     req->result = 0;
                     return NoFault;
@@ -599,7 +613,7 @@ class OzoneCPU : public BaseCPU
     void setSyscallReturn(SyscallReturn return_value, int tid);
 #endif
 
-    ExecContext *xcBase() { return &xcProxy; }
+    ExecContext *xcBase() { return xcProxy; }
 
     bool decoupledFrontEnd;
     struct CommStruct {
@@ -615,6 +629,8 @@ class OzoneCPU : public BaseCPU
     bool lockFlag;
 
     Stats::Scalar<> quiesceCycles;
+
+    Checker<DynInstPtr> *checker;
 };
 
 #endif // __CPU_OZONE_CPU_HH__
diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc
index 0146dd1bd..64aa49c71 100644
--- a/cpu/ozone/cpu_builder.cc
+++ b/cpu/ozone/cpu_builder.cc
@@ -1,6 +1,7 @@
 
 #include <string>
 
+#include "cpu/checker/cpu.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/ozone/cpu.hh"
 #include "cpu/ozone/ozone_impl.hh"
@@ -50,6 +51,8 @@ SimObjectVectorParam<Process *> workload;
 
 SimObjectParam<FunctionalMemory *> mem;
 
+SimObjectParam<BaseCPU *> checker;
+
 Param<Counter> max_insts_any_thread;
 Param<Counter> max_insts_all_threads;
 Param<Counter> max_loads_any_thread;
@@ -66,6 +69,7 @@ Param<unsigned> backEndSquashLatency;
 Param<unsigned> backEndLatency;
 Param<unsigned> maxInstBufferSize;
 Param<unsigned> numPhysicalRegs;
+Param<unsigned> maxOutstandingMemOps;
 
 Param<unsigned> decodeToFetchDelay;
 Param<unsigned> renameToFetchDelay;
@@ -164,6 +168,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
 
+    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
     INIT_PARAM_DFLT(max_insts_any_thread,
                     "Terminate when any thread reaches this inst count",
                     0),
@@ -190,6 +196,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
     INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
     INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
     INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+    INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
 
     INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
     INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
@@ -314,7 +321,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
 #endif // FULL_SYSTEM
 
     params->mem = mem;
-
+    params->checker = checker;
     params->max_insts_any_thread = max_insts_any_thread;
     params->max_insts_all_threads = max_insts_all_threads;
     params->max_loads_any_thread = max_loads_any_thread;
@@ -334,6 +341,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
     params->backEndLatency = backEndLatency;
     params->maxInstBufferSize = maxInstBufferSize;
     params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+    params->maxOutstandingMemOps = maxOutstandingMemOps;
 
     params->decodeToFetchDelay = decodeToFetchDelay;
     params->renameToFetchDelay = renameToFetchDelay;
@@ -445,6 +453,8 @@ SimObjectVectorParam<Process *> workload;
 
 SimObjectParam<FunctionalMemory *> mem;
 
+SimObjectParam<BaseCPU *> checker;
+
 Param<Counter> max_insts_any_thread;
 Param<Counter> max_insts_all_threads;
 Param<Counter> max_loads_any_thread;
@@ -559,6 +569,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
 
+    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
     INIT_PARAM_DFLT(max_insts_any_thread,
                     "Terminate when any thread reaches this inst count",
                     0),
@@ -709,7 +721,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
 #endif // FULL_SYSTEM
 
     params->mem = mem;
-
+    params->checker = checker;
     params->max_insts_any_thread = max_insts_any_thread;
     params->max_insts_all_threads = max_insts_all_threads;
     params->max_loads_any_thread = max_loads_any_thread;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 17d944e7c..4f3fdf521 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -33,6 +33,7 @@
 #include "base/trace.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
+#include "cpu/checker/exec_context.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/ozone/cpu.hh"
@@ -156,17 +157,33 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 #endif
       comm(5, 5)
 {
-
+    if (p->checker) {
+        BaseCPU *temp_checker = p->checker;
+        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
+    } else {
+        checker = NULL;
+    }
     frontEnd = new FrontEnd(p);
     backEnd = new BackEnd(p);
 
     _status = Idle;
-    thread.xcProxy = &xcProxy;
+    if (checker) {
+        checker->setMemory(mem);
+#if FULL_SYSTEM
+        checker->setSystem(p->system);
+#endif
+        checkerXC = new CheckerExecContext<OzoneXC>(&ozoneXC, checker);
+        thread.xcProxy = checkerXC;
+        xcProxy = checkerXC;
+    } else {
+        thread.xcProxy = &ozoneXC;
+        xcProxy = &ozoneXC;
+    }
 
     thread.inSyscall = false;
 
-    xcProxy.cpu = this;
-    xcProxy.thread = &thread;
+    ozoneXC.cpu = this;
+    ozoneXC.thread = &thread;
 
     thread.setStatus(ExecContext::Suspended);
 #if FULL_SYSTEM
@@ -177,7 +194,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     thread.tid = 0;
     thread.mem = p->mem;
 
-    thread.quiesceEvent = new EndQuiesceEvent(&xcProxy);
+    thread.quiesceEvent = new EndQuiesceEvent(xcProxy);
 
     system = p->system;
     itb = p->itb;
@@ -187,9 +204,10 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 
     if (p->profile) {
         thread.profile = new FunctionProfile(p->system->kernelSymtab);
+        // @todo: This might be better as an ExecContext instead of OzoneXC
         Callback *cb =
             new MakeCallback<OzoneXC,
-            &OzoneXC::dumpFuncProfile>(&xcProxy);
+            &OzoneXC::dumpFuncProfile>(&ozoneXC);
         registerExitCallback(cb);
     }
 
@@ -198,7 +216,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     static ProfileNode dummyNode;
     thread.profileNode = &dummyNode;
     thread.profilePC = 3;
-
 #else
 //    xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0);
     thread.cpu = this;
@@ -225,13 +242,13 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 
     issueWidth = p->issueWidth;
 */
-    execContexts.push_back(&xcProxy);
+    execContexts.push_back(xcProxy);
 
     frontEnd->setCPU(this);
     backEnd->setCPU(this);
 
-    frontEnd->setXC(&xcProxy);
-    backEnd->setXC(&xcProxy);
+    frontEnd->setXC(xcProxy);
+    backEnd->setXC(xcProxy);
 
     frontEnd->setThreadState(&thread);
     backEnd->setThreadState(&thread);
@@ -250,7 +267,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
 
     for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
         thread.renameTable[i] = new DynInst(this);
-        thread.renameTable[i]->setCompleted();
+        thread.renameTable[i]->setResultReady();
     }
 
     frontEnd->renameTable.copyFrom(thread.renameTable);
@@ -312,11 +329,15 @@ OzoneCPU<Impl>::copyToXC()
 */
 template <class Impl>
 void
-OzoneCPU<Impl>::switchOut()
+OzoneCPU<Impl>::switchOut(Sampler *sampler)
 {
+    // Front end needs state from back end, so switch out the back end first.
+    backEnd->switchOut();
+    frontEnd->switchOut();
     _status = SwitchedOut;
     if (tickEvent.scheduled())
         tickEvent.squash();
+    sampler->signalSwitched();
 }
 
 template <class Impl>
@@ -325,8 +346,16 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
 {
     BaseCPU::takeOverFrom(oldCPU);
 
+    backEnd->takeOverFrom();
+    frontEnd->takeOverFrom();
     assert(!tickEvent.scheduled());
 
+    // @todo: Fix hardcoded number
+    // Clear out any old information in time buffer.
+    for (int i = 0; i < 6; ++i) {
+        comm.advance();
+    }
+
     // if any of this CPU's ExecContexts are active, mark the CPU as
     // running and schedule its tick event.
     for (int i = 0; i < execContexts.size(); ++i) {
@@ -470,7 +499,7 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
     BaseCPU::serialize(os);
     SERIALIZE_ENUM(_status);
     nameOut(os, csprintf("%s.xc", name()));
-    xcProxy.serialize(os);
+    ozoneXC.serialize(os);
     nameOut(os, csprintf("%s.tickEvent", name()));
     tickEvent.serialize(os);
 }
@@ -481,7 +510,7 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
 {
     BaseCPU::unserialize(cp, section);
     UNSERIALIZE_ENUM(_status);
-    xcProxy.unserialize(cp, csprintf("%s.xc", section));
+    ozoneXC.unserialize(cp, csprintf("%s.xc", section));
     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 }
 
@@ -579,7 +608,7 @@ template <class Impl>
 Addr
 OzoneCPU<Impl>::dbg_vtophys(Addr addr)
 {
-    return vtophys(&xcProxy, addr);
+    return vtophys(xcProxy, addr);
 }
 #endif // FULL_SYSTEM
 /*
@@ -725,7 +754,7 @@ OzoneCPU<Impl>::tick()
     comInstEventQueue[0]->serviceEvents(numInst);
 
     if (!tickEvent.scheduled() && _status == Running)
-        tickEvent.schedule(curTick + 1);
+        tickEvent.schedule(curTick + cycles(1));
 }
 
 template <class Impl>
@@ -750,7 +779,7 @@ OzoneCPU<Impl>::syscall()
 
     DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
 
-    thread.process->syscall(&xcProxy);
+    thread.process->syscall(xcProxy);
 
     thread.funcExeInst--;
 
@@ -784,19 +813,17 @@ OzoneCPU<Impl>::hwrei()
 {
     // Need to move this to ISA code
     // May also need to make this per thread
+/*
     if (!inPalMode())
         return new UnimplementedOpcodeFault;
 
     thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
-
+*/
     lockFlag = false;
+    lockAddrList.clear();
+    kernelStats->hwrei();
 
-    // Not sure how to make a similar check in the Ozone model
-//    if (!misspeculating()) {
-        kernelStats->hwrei();
-
-        checkInterrupts = true;
-//    }
+    checkInterrupts = true;
 
     // FIXME: XXX check for interrupts? XXX
     return NoFault;
@@ -847,6 +874,11 @@ OzoneCPU<Impl>::processInterrupts()
     if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
         thread.setMiscReg(IPR_ISR, summary);
         thread.setMiscReg(IPR_INTID, ipl);
+        // @todo: Make this more transparent
+        if (checker) {
+            checkerXC->setMiscReg(IPR_ISR, summary);
+            checkerXC->setMiscReg(IPR_INTID, ipl);
+        }
         Fault fault = new InterruptFault;
         fault->invoke(thread.getXCProxy());
         DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
@@ -860,7 +892,7 @@ OzoneCPU<Impl>::simPalCheck(int palFunc)
 {
     // Need to move this to ISA code
     // May also need to make this per thread
-    this->kernelStats->callpal(palFunc, &xcProxy);
+    this->kernelStats->callpal(palFunc, xcProxy);
 
     switch (palFunc) {
       case PAL::halt:
@@ -944,7 +976,28 @@ OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
-{ }
+{
+    // some things should already be set up
+    assert(getMemPtr() == old_context->getMemPtr());
+#if FULL_SYSTEM
+    assert(getSystemPtr() == old_context->getSystemPtr());
+#else
+    assert(getProcessPtr() == old_context->getProcessPtr());
+#endif
+
+    // copy over functional state
+    setStatus(old_context->status());
+    copyArchRegs(old_context);
+    setCpuId(old_context->readCpuId());
+#if !FULL_SYSTEM
+    setFuncExeInst(old_context->readFuncExeInst());
+#endif
+
+//    storeCondFailures = 0;
+    cpu->lockFlag = false;
+
+    old_context->setStatus(ExecContext::Unallocated);
+}
 
 template <class Impl>
 void
@@ -1062,21 +1115,24 @@ template <class Impl>
 float
 OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
 {
-    return thread->renameTable[reg_idx]->readFloatResult();
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    return thread->renameTable[idx]->readFloatResult();
 }
 
 template <class Impl>
 double
 OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
 {
-    return thread->renameTable[reg_idx]->readDoubleResult();
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    return thread->renameTable[idx]->readDoubleResult();
 }
 
 template <class Impl>
 uint64_t
 OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
 {
-    return thread->renameTable[reg_idx]->readIntResult();
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    return thread->renameTable[idx]->readIntResult();
 }
 
 template <class Impl>
@@ -1101,7 +1157,9 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
 {
-    thread->renameTable[reg_idx]->setDoubleResult(val);
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+
+    thread->renameTable[idx]->setDoubleResult(val);
 
     if (!thread->inSyscall) {
         cpu->squashFromXC();
diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh
index 4382af0fd..f251c28ea 100644
--- a/cpu/ozone/dyn_inst.hh
+++ b/cpu/ozone/dyn_inst.hh
@@ -59,9 +59,9 @@ class OzoneDynInst : public BaseDynInst<Impl>
     typedef TheISA::MiscReg MiscReg;
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
-    // Note that this is duplicated from the BaseDynInst class; I'm simply not
-    // sure the enum would carry through so I could use it in array
-    // declarations in this class.
+    // Note that this is duplicated from the BaseDynInst class; I'm
+    // simply not sure the enum would carry through so I could use it
+    // in array declarations in this class.
     enum {
         MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
         MaxInstDestRegs = TheISA::MaxInstDestRegs
@@ -90,9 +90,23 @@ class OzoneDynInst : public BaseDynInst<Impl>
     void addDependent(DynInstPtr &dependent_inst);
 
     std::vector<DynInstPtr> &getDependents() { return dependents; }
+    std::vector<DynInstPtr> &getMemDeps() { return memDependents; }
+    std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; }
 
     void wakeDependents();
 
+    void wakeMemDependents();
+
+    void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); }
+
+    void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); }
+
+    void markMemInstReady(OzoneDynInst<Impl> *inst);
+
+    // For now I will remove instructions from the list when they wake
+    // up.  In the future, you only really need a counter.
+    bool memDepReady() { return srcMemInsts.empty(); }
+
 //    void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; }
 
 //    BPredInfo &getBPredInfo() { return bpInfo; }
@@ -104,9 +118,13 @@ class OzoneDynInst : public BaseDynInst<Impl>
 
     std::vector<DynInstPtr> dependents;
 
-    /** The instruction that produces the value of the source registers.  These
-     *  may be NULL if the value has already been read from the source
-     *  instruction.
+    std::vector<DynInstPtr> memDependents;
+
+    std::list<DynInstPtr> srcMemInsts;
+
+    /** The instruction that produces the value of the source
+     *  registers.  These may be NULL if the value has already been
+     *  read from the source instruction.
      */
     DynInstPtr srcInsts[MaxInstSrcRegs];
 
@@ -165,22 +183,22 @@ class OzoneDynInst : public BaseDynInst<Impl>
      */
     void setIntReg(const StaticInst *si, int idx, uint64_t val)
     {
-        this->instResult.integer = val;
+        BaseDynInst<Impl>::setIntReg(si, idx, val);
     }
 
     void setFloatRegSingle(const StaticInst *si, int idx, float val)
     {
-        this->instResult.fp = val;
+        BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
     }
 
     void setFloatRegDouble(const StaticInst *si, int idx, double val)
     {
-        this->instResult.dbl = val;
+        BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
     }
 
     void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
     {
-        this->instResult.integer = val;
+        BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
     }
 
     void setIntResult(uint64_t result) { this->instResult.integer = result; }
@@ -199,6 +217,8 @@ class OzoneDynInst : public BaseDynInst<Impl>
 
     void clearDependents();
 
+    void clearMemDependents();
+
   public:
     // ISA stuff
     MiscReg readMiscReg(int misc_reg);
diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh
index c83481c9a..a7e4460a1 100644
--- a/cpu/ozone/dyn_inst_impl.hh
+++ b/cpu/ozone/dyn_inst_impl.hh
@@ -38,7 +38,7 @@ template <class Impl>
 OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
     : BaseDynInst<Impl>(0, 0, 0, 0, cpu)
 {
-    this->setCompleted();
+    this->setResultReady();
 
     initInstPtrs();
 }
@@ -130,7 +130,7 @@ template <class Impl>
 bool
 OzoneDynInst<Impl>::srcInstReady(int regIdx)
 {
-    return srcInsts[regIdx]->isCompleted();
+    return srcInsts[regIdx]->isResultReady();
 }
 
 template <class Impl>
@@ -149,6 +149,28 @@ OzoneDynInst<Impl>::wakeDependents()
     }
 }
 
+template <class Impl>
+void
+OzoneDynInst<Impl>::wakeMemDependents()
+{
+    for (int i = 0; i < memDependents.size(); ++i) {
+        memDependents[i]->markMemInstReady(this);
+    }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst)
+{
+    ListIt mem_it = srcMemInsts.begin();
+    while ((*mem_it) != inst && mem_it != srcMemInsts.end()) {
+        mem_it++;
+    }
+    assert(mem_it != srcMemInsts.end());
+
+    srcMemInsts.erase(mem_it);
+}
+
 template <class Impl>
 void
 OzoneDynInst<Impl>::initInstPtrs()
@@ -164,7 +186,7 @@ bool
 OzoneDynInst<Impl>::srcsReady()
 {
     for (int i = 0; i < this->numSrcRegs(); ++i) {
-        if (!srcInsts[i]->isCompleted())
+        if (!srcInsts[i]->isResultReady())
             return false;
     }
 
@@ -176,7 +198,7 @@ bool
 OzoneDynInst<Impl>::eaSrcsReady()
 {
     for (int i = 1; i < this->numSrcRegs(); ++i) {
-        if (!srcInsts[i]->isCompleted())
+        if (!srcInsts[i]->isResultReady())
             return false;
     }
 
@@ -195,6 +217,14 @@ OzoneDynInst<Impl>::clearDependents()
         prevDestInst[i] = NULL;
     }
 }
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::clearMemDependents()
+{
+    memDependents.clear();
+}
+
 template <class Impl>
 MiscReg
 OzoneDynInst<Impl>::readMiscReg(int misc_reg)
@@ -213,6 +243,7 @@ template <class Impl>
 Fault
 OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
 {
+    this->setIntResult(val);
     return this->thread->setMiscReg(misc_reg, val);
 }
 
@@ -234,11 +265,13 @@ OzoneDynInst<Impl>::hwrei()
 
     this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
 
+    this->cpu->hwrei();
+/*
     this->cpu->kernelStats->hwrei();
 
     this->cpu->checkInterrupts = true;
     this->cpu->lockFlag = false;
-
+*/
     // FIXME: XXX check for interrupts? XXX
     return NoFault;
 }
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 2bff2544d..188925ae5 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -66,6 +66,14 @@ class FrontEnd
 
     bool isEmpty() { return instBuffer.empty(); }
 
+    void switchOut();
+
+    void takeOverFrom(ExecContext *old_xc = NULL);
+
+    bool isSwitchedOut() { return switchedOut; }
+
+    bool switchedOut;
+
   private:
     bool updateStatus();
 
@@ -198,6 +206,9 @@ class FrontEnd
 
     DynInstPtr barrierInst;
 
+  public:
+    bool interruptPending;
+  private:
     // number of idle cycles
 /*
     Stats::Average<> notIdleFraction;
@@ -223,6 +234,8 @@ class FrontEnd
     Stats::Scalar<> fetchBlockedCycles;
     /** Stat for total number of fetched cache lines. */
     Stats::Scalar<> fetchedCacheLines;
+
+    Stats::Scalar<> fetchIcacheSquashes;
     /** Distribution of number of instructions fetched each cycle. */
     Stats::Distribution<> fetchNisnDist;
 //    Stats::Vector<> qfull_iq_occupancy;
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index 7c18386cf..a3eb809d0 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -19,8 +19,11 @@ FrontEnd<Impl>::FrontEnd(Params *params)
       width(params->frontEndWidth),
       freeRegs(params->numPhysicalRegs),
       numPhysRegs(params->numPhysicalRegs),
-      serializeNext(false)
+      serializeNext(false),
+      interruptPending(false)
 {
+    switchedOut = false;
+
     status = Idle;
 
     // Setup branch predictor.
@@ -127,6 +130,11 @@ FrontEnd<Impl>::regStats()
         .desc("Number of cache lines fetched")
         .prereq(fetchedCacheLines);
 
+    fetchIcacheSquashes
+        .name(name() + ".fetchIcacheSquashes")
+        .desc("Number of outstanding Icache misses that were squashed")
+        .prereq(fetchIcacheSquashes);
+
     fetchNisnDist
         .init(/* base value */ 0,
               /* last value */ width,
@@ -370,6 +378,10 @@ FrontEnd<Impl>::fetchCacheLine()
 #endif // FULL_SYSTEM
     Fault fault = NoFault;
 
+    if (interruptPending && flags == 0) {
+        return fault;
+    }
+
     // Align the fetch PC so it's at the start of a cache block.
     Addr fetch_PC = icacheBlockAlignPC(PC);
 
@@ -397,7 +409,8 @@ FrontEnd<Impl>::fetchCacheLine()
     // exists within the cache.
     if (icacheInterface && fault == NoFault) {
 #if FULL_SYSTEM
-        if (cpu->system->memctrl->badaddr(memReq->paddr)) {
+        if (cpu->system->memctrl->badaddr(memReq->paddr) ||
+            memReq->flags & UNCACHEABLE) {
             DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
                     "misspeculating path!",
                     memReq->paddr);
@@ -497,7 +510,7 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
             dispatchedTempSerializing++;
         }
 
-        // Change status over to BarrierStall so that other stages know
+        // Change status over to SerializeBlocked so that other stages know
         // what this is blocked on.
         status = SerializeBlocked;
 
@@ -613,8 +626,10 @@ FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
 
     // Do something here.
     if (status != IcacheMissStall ||
-        req != memReq) {
+        req != memReq ||
+        switchedOut) {
         DPRINTF(FE, "Previous fetch was squashed.\n");
+        fetchIcacheSquashes++;
         return;
     }
 
@@ -702,6 +717,7 @@ FrontEnd<Impl>::getInstFromCacheline()
         DynInstPtr inst = barrierInst;
         status = Running;
         barrierInst = NULL;
+        inst->clearSerializeBefore();
         return inst;
     }
 
@@ -773,7 +789,7 @@ FrontEnd<Impl>::renameInst(DynInstPtr &inst)
             DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
                     inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
 
-            if (src_inst->isCompleted()) {
+            if (src_inst->isResultReady()) {
                 DPRINTF(FE, "Reg ready.\n");
                 inst->markSrcRegReady(i);
             } else {
@@ -807,6 +823,38 @@ FrontEnd<Impl>::wakeFromQuiesce()
     status = Running;
 }
 
+template <class Impl>
+void
+FrontEnd<Impl>::switchOut()
+{
+    switchedOut = true;
+    memReq = NULL;
+    squash(0, 0);
+    instBuffer.clear();
+    instBufferSize = 0;
+    status = Idle;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+    assert(freeRegs == numPhysRegs);
+    fetchCacheLineNextCycle = true;
+
+    cacheBlkValid = false;
+
+#if !FULL_SYSTEM
+//    pTable = params->pTable;
+#endif
+    fetchFault = NoFault;
+    serializeNext = false;
+    barrierInst = NULL;
+    status = Running;
+    switchedOut = false;
+    interruptPending = false;
+}
+
 template <class Impl>
 void
 FrontEnd<Impl>::dumpInsts()
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index f17c93ff4..028fdaf8c 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -17,6 +17,8 @@
 #include "mem/mem_req.hh"
 #include "sim/eventq.hh"
 
+template <class>
+class Checker;
 class ExecContext;
 
 template <class Impl>
@@ -126,6 +128,8 @@ class LWBackEnd
 
     Addr commitPC;
 
+    Tick lastCommitCycle;
+
     bool robEmpty() { return instList.empty(); }
 
     bool isFull() { return numInsts >= numROBEntries; }
@@ -133,7 +137,7 @@ class LWBackEnd
 
     void fetchFault(Fault &fault);
 
-    int wakeDependents(DynInstPtr &inst);
+    int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
 
     /** Tells memory dependence unit that a memory instruction needs to be
      * rescheduled. It will re-execute once replayMemInst() is called.
@@ -182,6 +186,12 @@ class LWBackEnd
 
     void instToCommit(DynInstPtr &inst);
 
+    void switchOut();
+
+    void takeOverFrom(ExecContext *old_xc = NULL);
+
+    bool isSwitchedOut() { return switchedOut; }
+
   private:
     void generateTrapEvent(Tick latency = 0);
     void handleFault(Fault &fault, Tick latency = 0);
@@ -303,6 +313,10 @@ class LWBackEnd
     Fault faultFromFetch;
     bool fetchHasFault;
 
+    bool switchedOut;
+
+    DynInstPtr memBarrier;
+
   private:
     struct pqCompare {
         bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@@ -327,7 +341,7 @@ class LWBackEnd
 
     bool exactFullStall;
 
-    bool fetchRedirect[Impl::MaxThreads];
+//    bool fetchRedirect[Impl::MaxThreads];
 
     // number of cycles stalled for D-cache misses
 /*    Stats::Scalar<> dcacheStallCycles;
@@ -414,6 +428,8 @@ class LWBackEnd
     Stats::VectorDistribution<> ROB_occ_dist;
   public:
     void dumpInsts();
+
+    Checker<DynInstPtr> *checker;
 };
 
 template <class Impl>
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index d1290239c..d4829629d 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -1,5 +1,6 @@
 
 #include "encumbered/cpu/full/op_class.hh"
+#include "cpu/checker/cpu.hh"
 #include "cpu/ozone/lw_back_end.hh"
 
 template <class Impl>
@@ -10,28 +11,36 @@ LWBackEnd<Impl>::generateTrapEvent(Tick latency)
 
     TrapEvent *trap = new TrapEvent(this);
 
-    trap->schedule(curTick + latency);
+    trap->schedule(curTick + cpu->cycles(latency));
 
     thread->trapPending = true;
 }
 
 template <class Impl>
 int
-LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst)
+LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
 {
     assert(!inst->isSquashed());
-    std::vector<DynInstPtr> &dependents = inst->getDependents();
+    std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
+        inst->getDependents();
     int num_outputs = dependents.size();
 
     DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
 
     for (int i = 0; i < num_outputs; i++) {
         DynInstPtr dep_inst = dependents[i];
-        dep_inst->markSrcRegReady();
+        if (!memory_deps) {
+            dep_inst->markSrcRegReady();
+        } else {
+            if (!dep_inst->isSquashed())
+                dep_inst->markMemInstReady(inst.get());
+        }
+
         DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
 
         if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
-            !dep_inst->isNonSpeculative()) {
+            !dep_inst->isNonSpeculative() &&
+            dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) {
             DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
                     dep_inst->seqNum);
             exeList.push(dep_inst);
@@ -114,6 +123,9 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
 
 //    iewStage->wakeCPU();
 
+    if (be->isSwitchedOut())
+        return;
+
     if (dcacheMiss) {
         be->removeDcacheMiss(inst);
     }
@@ -169,16 +181,18 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
 template <class Impl>
 LWBackEnd<Impl>::LWBackEnd(Params *params)
     : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
-      xcSquash(false), cacheCompletionEvent(this),
+      trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
       dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
       exactFullStall(true)
 {
     numROBEntries = params->numROBEntries;
     numInsts = 0;
     numDispatchEntries = 32;
-    maxOutstandingMemOps = 4;
+    maxOutstandingMemOps = params->maxOutstandingMemOps;
     numWaitingMemOps = 0;
     waitingInsts = 0;
+    switchedOut = false;
+
 //    IQ.setBE(this);
     LSQ.setBE(this);
 
@@ -533,6 +547,7 @@ LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
 {
     cpu = cpu_ptr;
     LSQ.setCPU(cpu_ptr);
+    checker = cpu->checker;
 }
 
 template <class Impl>
@@ -554,30 +569,35 @@ LWBackEnd<Impl>::checkInterrupts()
         !cpu->inPalMode(thread->readPC()) &&
         !trapSquash &&
         !xcSquash) {
-        // Will need to squash all instructions currently in flight and have
-        // the interrupt handler restart at the last non-committed inst.
-        // Most of that can be handled through the trap() function.  The
-        // processInterrupts() function really just checks for interrupts
-        // and then calls trap() if there is an interrupt present.
+        frontEnd->interruptPending = true;
+        if (robEmpty() && !LSQ.hasStoresToWB()) {
+            // Will need to squash all instructions currently in flight and have
+            // the interrupt handler restart at the last non-committed inst.
+            // Most of that can be handled through the trap() function.  The
+            // processInterrupts() function really just checks for interrupts
+            // and then calls trap() if there is an interrupt present.
 
-        // Not sure which thread should be the one to interrupt.  For now
-        // always do thread 0.
-        assert(!thread->inSyscall);
-        thread->inSyscall = true;
+            // Not sure which thread should be the one to interrupt.  For now
+            // always do thread 0.
+            assert(!thread->inSyscall);
+            thread->inSyscall = true;
 
-        // CPU will handle implementation of the interrupt.
-        cpu->processInterrupts();
+            // CPU will handle implementation of the interrupt.
+            cpu->processInterrupts();
 
-        // Now squash or record that I need to squash this cycle.
-        commitStatus = TrapPending;
+            // Now squash or record that I need to squash this cycle.
+            commitStatus = TrapPending;
 
-        // Exit state update mode to avoid accidental updating.
-        thread->inSyscall = false;
+            // Exit state update mode to avoid accidental updating.
+            thread->inSyscall = false;
 
-        // Generate trap squash event.
-        generateTrapEvent();
+            // Generate trap squash event.
+            generateTrapEvent();
 
-        DPRINTF(BE, "Interrupt detected.\n");
+            DPRINTF(BE, "Interrupt detected.\n");
+        } else {
+            DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
+        }
     }
 }
 
@@ -585,7 +605,7 @@ template <class Impl>
 void
 LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
 {
-    DPRINTF(BE, "Handling fault!");
+    DPRINTF(BE, "Handling fault!\n");
 
     assert(!thread->inSyscall);
 
@@ -615,6 +635,9 @@ LWBackEnd<Impl>::tick()
 
     wbCycle = 0;
 
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
 #if FULL_SYSTEM
     checkInterrupts();
 
@@ -623,7 +646,7 @@ LWBackEnd<Impl>::tick()
         squashFromTrap();
     } else if (xcSquash) {
         squashFromXC();
-    } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty()) {
+    } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
         DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
         Fault fetch_fault = frontEnd->getFault();
         if (fetch_fault == NoFault) {
@@ -636,9 +659,6 @@ LWBackEnd<Impl>::tick()
     }
 #endif
 
-    // Read in any done instruction information and update the IQ or LSQ.
-    updateStructures();
-
     if (dispatchStatus != Blocked) {
         dispatchInsts();
     } else {
@@ -719,12 +739,41 @@ LWBackEnd<Impl>::dispatchInsts()
         for (int i = 0; i < inst->numDestRegs(); ++i)
             renameTable[inst->destRegIdx(i)] = inst;
 
-        if (inst->readyToIssue() && !inst->isNonSpeculative()) {
-            DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
-                    inst->seqNum);
-            exeList.push(inst);
+        if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+            if (memBarrier) {
+                DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+                        "barrier [sn:%lli].\n",
+                        inst->seqNum, memBarrier->seqNum);
+                memBarrier->addMemDependent(inst);
+                inst->addSrcMemInst(memBarrier);
+            }
+            memBarrier = inst;
+            inst->setCanCommit();
+        } else if (inst->readyToIssue() && !inst->isNonSpeculative()) {
             if (inst->isMemRef()) {
+
                 LSQ.insert(inst);
+                if (memBarrier) {
+                    DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+                            "barrier [sn:%lli].\n",
+                            inst->seqNum, memBarrier->seqNum);
+                    memBarrier->addMemDependent(inst);
+                    inst->addSrcMemInst(memBarrier);
+                    addWaitingMemOp(inst);
+
+                    waitingList.push_front(inst);
+                    inst->iqIt = waitingList.begin();
+                    inst->iqItValid = true;
+                    waitingInsts++;
+                } else {
+                    DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+                            inst->seqNum);
+                    exeList.push(inst);
+                }
+            } else {
+                DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+                        inst->seqNum);
+                exeList.push(inst);
             }
         } else {
             if (inst->isNonSpeculative()) {
@@ -735,6 +784,14 @@ LWBackEnd<Impl>::dispatchInsts()
             if (inst->isMemRef()) {
                 addWaitingMemOp(inst);
                 LSQ.insert(inst);
+                if (memBarrier) {
+                    memBarrier->addMemDependent(inst);
+                    inst->addSrcMemInst(memBarrier);
+
+                    DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+                            "barrier [sn:%lli].\n",
+                            inst->seqNum, memBarrier->seqNum);
+                }
             }
 
             DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
@@ -872,9 +929,6 @@ LWBackEnd<Impl>::executeInsts()
 
         ++funcExeInst;
         ++num_executed;
-        // keep an instruction count
-        thread->numInst++;
-        thread->numInsts++;
 
         exeList.pop();
 
@@ -915,7 +969,7 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
         inst->setCanCommit();
 
         if (inst->isExecuted()) {
-            inst->setCompleted();
+            inst->setResultReady();
             int dependents = wakeDependents(inst);
             if (dependents) {
                 producer_inst[0]++;
@@ -956,7 +1010,7 @@ LWBackEnd<Impl>::writebackInsts()
                     inst->seqNum, inst->readPC());
 
             inst->setCanCommit();
-            inst->setCompleted();
+            inst->setResultReady();
 
             if (inst->isExecuted()) {
                 int dependents = wakeDependents(inst);
@@ -997,7 +1051,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     // If the instruction is not executed yet, then it is a non-speculative
     // or store inst.  Signal backwards that it should be executed.
     if (!inst->isExecuted()) {
-        if (inst->isNonSpeculative()) {
+        if (inst->isNonSpeculative() ||
+            inst->isMemBarrier() ||
+            inst->isWriteBarrier()) {
 #if !FULL_SYSTEM
             // Hack to make sure syscalls aren't executed until all stores
             // write back their data.  This direct communication shouldn't
@@ -1017,6 +1073,16 @@ LWBackEnd<Impl>::commitInst(int inst_num)
                     "instruction at the head of the ROB, PC %#x.\n",
                     inst->readPC());
 
+            if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+                DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
+                        inst->seqNum);
+                assert(memBarrier);
+                wakeDependents(inst, true);
+                if (memBarrier == inst)
+                    memBarrier = NULL;
+                inst->clearMemDependents();
+            }
+
             // Send back the non-speculative instruction's sequence number.
             if (inst->iqItValid) {
                 DPRINTF(BE, "Removing instruction from waiting list\n");
@@ -1066,13 +1132,45 @@ LWBackEnd<Impl>::commitInst(int inst_num)
 
     // Not handled for now.
     assert(!inst->isThreadSync());
-
+    assert(inst->memDepReady());
+    // Stores will mark themselves as totally completed as they need
+    // to wait to writeback to memory.  @todo: Hack...attempt to fix
+    // having the checker be forced to wait until a store completes in
+    // order to check all of the instructions.  If the store at the
+    // head of the check list misses, but a later store hits, then
+    // loads in the checker may see the younger store values instead
+    // of the store they should see.  Either the checker needs its own
+    // memory (annoying to update), its own store buffer (how to tell
+    // which value is correct?), or something else...
+    if (!inst->isStore()) {
+        inst->setCompleted();
+    }
     // Check if the instruction caused a fault.  If so, trap.
     Fault inst_fault = inst->getFault();
 
+    // Use checker prior to updating anything due to traps or PC
+    // based events.
+    if (checker) {
+        checker->tick(inst);
+    }
+
     if (inst_fault != NoFault) {
         DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
                 inst->seqNum, inst->readPC());
+
+        // Instruction is completed as it has a fault.
+        inst->setCompleted();
+
+        if (LSQ.hasStoresToWB()) {
+            DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
+            return false;
+        } else if (inst_num != 0) {
+            DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
+            return false;
+        } else if (checker && inst->isStore()) {
+            checker->tick(inst);
+        }
+
         thread->setInst(
             static_cast<TheISA::MachInst>(inst->staticInst->machInst));
 #if FULL_SYSTEM
@@ -1094,6 +1192,8 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     }
 
     if (inst->traceData) {
+        inst->traceData->setFetchSeq(inst->seqNum);
+        inst->traceData->setCPSeq(thread->numInst);
         inst->traceData->finalize();
         inst->traceData = NULL;
     }
@@ -1105,18 +1205,18 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     instList.pop_back();
 
     --numInsts;
-    thread->numInsts++;
     ++thread->funcExeInst;
-    // Maybe move this to where teh fault is handled; if the fault is handled,
+    // Maybe move this to where the fault is handled; if the fault is handled,
     // don't try to set this myself as the fault will set it.  If not, then
     // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4.
     thread->setPC(thread->readNextPC());
+    thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
     updateComInstStats(inst);
 
     // Write the done sequence number here.
 //    LSQ.commitLoads(inst->seqNum);
-//    LSQ.commitStores(inst->seqNum);
     toIEW->doneSeqNum = inst->seqNum;
+    lastCommitCycle = curTick;
 
 #if FULL_SYSTEM
     int count = 0;
@@ -1243,6 +1343,22 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
         waitingInsts--;
     }
 
+    while (memBarrier && memBarrier->seqNum > sn) {
+        DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum);
+        memBarrier->clearMemDependents();
+        if (memBarrier->memDepReady()) {
+            DPRINTF(BE, "No previous barrier\n");
+            memBarrier = NULL;
+        } else {
+            std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
+            memBarrier = srcs.front();
+            srcs.pop_front();
+            assert(srcs.empty());
+            DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
+                    memBarrier->seqNum);
+        }
+    }
+
     frontEnd->addFreeRegs(freed_regs);
 }
 
@@ -1254,6 +1370,7 @@ LWBackEnd<Impl>::squashFromXC()
     squash(squashed_inst);
     frontEnd->squash(squashed_inst, thread->readPC(),
                      false, false);
+    frontEnd->interruptPending = false;
 
     thread->trapPending = false;
     thread->inSyscall = false;
@@ -1269,6 +1386,7 @@ LWBackEnd<Impl>::squashFromTrap()
     squash(squashed_inst);
     frontEnd->squash(squashed_inst, thread->readPC(),
                      false, false);
+    frontEnd->interruptPending = false;
 
     thread->trapPending = false;
     thread->inSyscall = false;
@@ -1319,6 +1437,36 @@ LWBackEnd<Impl>::fetchFault(Fault &fault)
     fetchHasFault = true;
 }
 
+template <class Impl>
+void
+LWBackEnd<Impl>::switchOut()
+{
+    switchedOut = true;
+    // Need to get rid of all committed, non-speculative state and write it
+    // to memory/XC.  In this case this is stores that have committed and not
+    // yet written back.
+    LSQ.switchOut();
+    squash(0);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+    switchedOut = false;
+    xcSquash = false;
+    trapSquash = false;
+
+    numInsts = 0;
+    numWaitingMemOps = 0;
+    waitingMemOps.clear();
+    waitingInsts = 0;
+    switchedOut = false;
+    dispatchStatus = Running;
+    commitStatus = Running;
+    LSQ.takeOverFrom(old_xc);
+}
+
 template <class Impl>
 void
 LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
@@ -1358,7 +1506,11 @@ template <class Impl>
 void
 LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
 {
-    unsigned thread = inst->threadNumber;
+    unsigned tid = inst->threadNumber;
+
+    // keep an instruction count
+    thread->numInst++;
+    thread->numInsts++;
 
     cpu->numInst++;
     //
@@ -1366,33 +1518,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
     //
 #ifdef TARGET_ALPHA
     if (inst->isDataPrefetch()) {
-        stat_com_swp[thread]++;
+        stat_com_swp[tid]++;
     } else {
-        stat_com_inst[thread]++;
+        stat_com_inst[tid]++;
     }
 #else
-    stat_com_inst[thread]++;
+    stat_com_inst[tid]++;
 #endif
 
     //
     //  Control Instructions
     //
     if (inst->isControl())
-        stat_com_branches[thread]++;
+        stat_com_branches[tid]++;
 
     //
     //  Memory references
     //
     if (inst->isMemRef()) {
-        stat_com_refs[thread]++;
+        stat_com_refs[tid]++;
 
         if (inst->isLoad()) {
-            stat_com_loads[thread]++;
+            stat_com_loads[tid]++;
         }
     }
 
     if (inst->isMemBarrier()) {
-        stat_com_membars[thread]++;
+        stat_com_membars[tid]++;
     }
 }
 
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index eb9886244..042610324 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -41,6 +41,7 @@
 #include "cpu/inst_seq.hh"
 #include "mem/mem_interface.hh"
 //#include "mem/page_table.hh"
+#include "sim/debug.hh"
 #include "sim/sim_object.hh"
 
 //class PageTable;
@@ -90,7 +91,10 @@ class OzoneLWLSQ {
         /** The writeback event for the store.  Needed for store
          * conditionals.
          */
+      public:
         Event *wbEvent;
+        bool miss;
+      private:
         /** The pointer to the LSQ unit that issued the store. */
         OzoneLWLSQ<Impl> *lsqPtr;
     };
@@ -228,6 +232,14 @@ class OzoneLWLSQ {
                         !storeQueue.back().completed &&
                         !dcacheInterface->isBlocked(); }
 
+    void switchOut();
+
+    void takeOverFrom(ExecContext *old_xc = NULL);
+
+    bool isSwitchedOut() { return switchedOut; }
+
+    bool switchedOut;
+
   private:
     /** Completes the store at the specified index. */
     void completeStore(int store_idx);
@@ -560,12 +572,10 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
         sq_it++;
     }
 
-
     // If there's no forwarding case, then go access memory
     DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
             inst->readPC());
 
-
     // Setup MemReq pointer
     req->cmd = Read;
     req->completionEvent = NULL;
@@ -594,8 +604,12 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
         DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
                 "vaddr:%#x flags:%i\n",
                 inst->readPC(), req->paddr, req->vaddr, req->flags);
-
-
+/*
+        Addr debug_addr = ULL(0xfffffc0000be81a8);
+        if (req->vaddr == debug_addr) {
+            debug_break();
+        }
+*/
         assert(!req->completionEvent);
         req->completionEvent =
             new typename BackEnd::LdWritebackEvent(inst, be);
@@ -647,7 +661,15 @@ OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
     (*sq_it).req = req;
     (*sq_it).size = sizeof(T);
     (*sq_it).data = data;
-
+    assert(!req->data);
+    req->data = new uint8_t[64];
+    memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+/*
+    Addr debug_addr = ULL(0xfffffc0000be81a8);
+    if (req->vaddr == debug_addr) {
+        debug_break();
+    }
+*/
     // This function only writes the data to the store queue, so no fault
     // can happen here.
     return NoFault;
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index 7b22d2564..9b7e48f96 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -29,6 +29,7 @@
 #include "arch/isa_traits.hh"
 #include "base/str.hh"
 #include "cpu/ozone/lw_lsq.hh"
+#include "cpu/checker/cpu.hh"
 
 template <class Impl>
 OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
@@ -39,6 +40,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
       inst(_inst),
       be(_be),
       wbEvent(wb_event),
+      miss(false),
       lsqPtr(lsq_ptr)
 {
     this->setFlags(Event::AutoDelete);
@@ -54,13 +56,21 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
     //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
 
 //    lsqPtr->cpu->wakeCPU();
+    if (lsqPtr->isSwitchedOut()) {
+        if (wbEvent)
+            delete wbEvent;
+
+        return;
+    }
+
     if (wbEvent) {
         wbEvent->process();
         delete wbEvent;
     }
 
     lsqPtr->completeStore(inst->sqIdx);
-    be->removeDcacheMiss(inst);
+    if (miss)
+        be->removeDcacheMiss(inst);
 }
 
 template <class Impl>
@@ -80,8 +90,7 @@ OzoneLWLSQ<Impl>::OzoneLWLSQ()
 template<class Impl>
 void
 OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
-                     unsigned maxSQEntries, unsigned id)
-
+                       unsigned maxSQEntries, unsigned id)
 {
     DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
 
@@ -90,7 +99,7 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
     LQEntries = maxLQEntries;
     SQEntries = maxSQEntries;
 
-    for (int i = 0; i < LQEntries * 10; i++) {
+    for (int i = 0; i < LQEntries * 2; i++) {
         LQIndices.push(i);
         SQIndices.push(i);
     }
@@ -196,6 +205,7 @@ template <class Impl>
 void
 OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
 {
+    assert(loads < LQEntries * 2);
     assert(!LQIndices.empty());
     int load_index = LQIndices.front();
     LQIndices.pop();
@@ -503,21 +513,13 @@ OzoneLWLSQ<Impl>::writebackStores()
         assert((*sq_it).req);
         assert(!(*sq_it).committed);
 
-        MemReqPtr req = (*sq_it).req;
         (*sq_it).committed = true;
 
+        MemReqPtr req = (*sq_it).req;
+
         req->cmd = Write;
         req->completionEvent = NULL;
         req->time = curTick;
-        assert(!req->data);
-        req->data = new uint8_t[64];
-        memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-
-        DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
-                "to Addr:%#x, data:%#x [sn:%lli]\n",
-                inst->sqIdx,inst->readPC(),
-                req->paddr, *(req->data),
-                inst->seqNum);
 
         switch((*sq_it).size) {
           case 1:
@@ -535,8 +537,25 @@ OzoneLWLSQ<Impl>::writebackStores()
           default:
             panic("Unexpected store size!\n");
         }
+        if (!(req->flags & LOCKED)) {
+            (*sq_it).inst->setCompleted();
+            if (cpu->checker) {
+                cpu->checker->tick((*sq_it).inst);
+            }
+        }
+
+        DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+                "to Addr:%#x, data:%#x [sn:%lli]\n",
+                inst->sqIdx,inst->readPC(),
+                req->paddr, *(req->data),
+                inst->seqNum);
 
         if (dcacheInterface) {
+            assert(!req->completionEvent);
+            StoreCompletionEvent *store_event = new
+                StoreCompletionEvent(inst, be, NULL, this);
+            req->completionEvent = store_event;
+
             MemAccessResult result = dcacheInterface->access(req);
 
             if (isStalled() &&
@@ -551,13 +570,14 @@ OzoneLWLSQ<Impl>::writebackStores()
 
             if (result != MA_HIT && dcacheInterface->doEvents()) {
 //                Event *wb = NULL;
-
+                store_event->miss = true;
                 typename BackEnd::LdWritebackEvent *wb = NULL;
                 if (req->flags & LOCKED) {
                     // Stx_C does not generate a system port transaction.
 //                    req->result=1;
                     wb = new typename BackEnd::LdWritebackEvent(inst,
                                                             be);
+                    store_event->wbEvent = wb;
                 }
 
                 DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
@@ -567,9 +587,6 @@ OzoneLWLSQ<Impl>::writebackStores()
 
                 // Will stores need their own kind of writeback events?
                 // Do stores even need writeback events?
-                assert(!req->completionEvent);
-                req->completionEvent = new
-                    StoreCompletionEvent(inst, be, wb, this);
                 be->addDcacheMiss(inst);
 
                 lastDcacheStall = curTick;
@@ -597,10 +614,10 @@ OzoneLWLSQ<Impl>::writebackStores()
                     typename BackEnd::LdWritebackEvent *wb =
                         new typename BackEnd::LdWritebackEvent(inst,
                                                                be);
-                    wb->schedule(curTick);
+                    store_event->wbEvent = wb;
                 }
                 sq_it--;
-                completeStore(inst->sqIdx);
+//                completeStore(inst->sqIdx);
             }
         } else {
             panic("Must HAVE DCACHE!!!!!\n");
@@ -758,31 +775,121 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
     DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
             inst->sqIdx, inst->seqNum, storesToWB);
 
-    // A bit conservative because a store completion may not free up entries,
-    // but hopefully avoids two store completions in one cycle from making
-    // the CPU tick twice.
-//    cpu->activityThisCycle();
     assert(!storeQueue.empty());
     SQItHash.erase(sq_hash_it);
     SQIndices.push(inst->sqIdx);
     storeQueue.erase(sq_it);
     --stores;
-/*
-    SQIt oldest_store_it = --(storeQueue.end());
-    if (sq_it == oldest_store_it) {
-        do {
-            inst = (*oldest_store_it).inst;
-            sq_hash_it = SQItHash.find(inst->sqIdx);
-            assert(sq_hash_it != SQItHash.end());
-            SQItHash.erase(sq_hash_it);
-            SQIndices.push(inst->sqIdx);
-            storeQueue.erase(oldest_store_it--);
-
-            --stores;
-        } while ((*oldest_store_it).completed &&
-                 oldest_store_it != storeQueue.end());
-
-//        be->updateLSQNextCycle = true;
+//    assert(!inst->isCompleted());
+    inst->setCompleted();
+    if (cpu->checker) {
+        cpu->checker->tick(inst);
     }
-*/
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::switchOut()
+{
+    switchedOut = true;
+    SQIt sq_it = --(storeQueue.end());
+    while (storesToWB > 0 &&
+           sq_it != storeQueue.end() &&
+           (*sq_it).inst &&
+           (*sq_it).canWB) {
+
+        DynInstPtr inst = (*sq_it).inst;
+
+        if ((*sq_it).size == 0 && !(*sq_it).completed) {
+            sq_it--;
+//            completeStore(inst->sqIdx);
+
+            continue;
+        }
+
+        // Store conditionals don't complete until *after* they have written
+        // back.  If it's here and not yet sent to memory, then don't bother
+        // as it's not part of committed state.
+        if (inst->isDataPrefetch() || (*sq_it).committed ||
+            (*sq_it).req->flags & LOCKED) {
+            sq_it--;
+            continue;
+        }
+
+        assert((*sq_it).req);
+        assert(!(*sq_it).committed);
+
+        MemReqPtr req = (*sq_it).req;
+        (*sq_it).committed = true;
+
+        req->cmd = Write;
+        req->completionEvent = NULL;
+        req->time = curTick;
+        assert(!req->data);
+        req->data = new uint8_t[64];
+        memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+
+        DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
+                "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
+                inst->sqIdx,inst->readPC(),
+                req->paddr, *(req->data),
+                inst->seqNum);
+
+        switch((*sq_it).size) {
+          case 1:
+            cpu->write(req, (uint8_t &)(*sq_it).data);
+            break;
+          case 2:
+            cpu->write(req, (uint16_t &)(*sq_it).data);
+            break;
+          case 4:
+            cpu->write(req, (uint32_t &)(*sq_it).data);
+            break;
+          case 8:
+            cpu->write(req, (uint64_t &)(*sq_it).data);
+            break;
+          default:
+            panic("Unexpected store size!\n");
+        }
+    }
+
+    // Clear the queue to free up resources
+    storeQueue.clear();
+    loadQueue.clear();
+    loads = stores = storesToWB = 0;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+    // Clear out any old state. May be redundant if this is the first time
+    // the CPU is being used.
+    stalled = false;
+    isLoadBlocked = false;
+    loadBlockedHandled = false;
+    switchedOut = false;
+
+    // Could do simple checks here to see if indices are on twice
+    while (!LQIndices.empty())
+        LQIndices.pop();
+    while (!SQIndices.empty())
+        SQIndices.pop();
+
+    for (int i = 0; i < LQEntries * 2; i++) {
+        LQIndices.push(i);
+        SQIndices.push(i);
+    }
+
+    // May want to initialize these entries to NULL
+
+//    loadHead = loadTail = 0;
+
+//    storeHead = storeWBIdx = storeTail = 0;
+
+    usedPorts = 0;
+
+    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+    blockedLoadSeqNum = 0;
 }
diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh
index e503654aa..647da1781 100644
--- a/cpu/ozone/simple_params.hh
+++ b/cpu/ozone/simple_params.hh
@@ -51,6 +51,7 @@ class SimpleParams : public BaseCPU::Params
     unsigned backEndLatency;
     unsigned maxInstBufferSize;
     unsigned numPhysicalRegs;
+    unsigned maxOutstandingMemOps;
     //
     // Fetch
     //
-- 
cgit v1.2.3


From 52383ca7cc2b4698109b71a968cde16e9f7dc6e0 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Tue, 16 May 2006 14:09:04 -0400
Subject: Sampler updates.

cpu/ozone/cpu.hh:
    Updates for sampler.
cpu/ozone/cpu_impl.hh:
    Updates for sampler, checker.
cpu/ozone/inorder_back_end.hh:
    Sampler updates.  Also support old memory system.

--HG--
extra : convert_revision : 33ebe38e4c08d49c6af84032b819533b784b4fe8
---
 cpu/ozone/cpu.hh              |   8 ++-
 cpu/ozone/cpu_impl.hh         | 122 +++++++++++++-----------------------------
 cpu/ozone/front_end.hh        |   2 +
 cpu/ozone/front_end_impl.hh   |  10 ++++
 cpu/ozone/inorder_back_end.hh |  44 ++++++++++++---
 cpu/ozone/lw_back_end.hh      |   3 +-
 cpu/ozone/lw_back_end_impl.hh |  19 +++++++
 cpu/ozone/lw_lsq_impl.hh      |   9 +++-
 8 files changed, 122 insertions(+), 95 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index eec8902d8..1d522b2fa 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -64,6 +64,7 @@ class Process;
 #endif // FULL_SYSTEM
 
 class Checkpoint;
+class EndQuiesceEvent;
 class MemInterface;
 
 namespace Trace {
@@ -149,7 +150,7 @@ class OzoneCPU : public BaseCPU
         void unserialize(Checkpoint *cp, const std::string &section);
 
 #if FULL_SYSTEM
-        Event *getQuiesceEvent();
+        EndQuiesceEvent *getQuiesceEvent();
 
         Tick readLastActivate();
         Tick readLastSuspend();
@@ -330,8 +331,13 @@ class OzoneCPU : public BaseCPU
     int cpuId;
 
     void switchOut(Sampler *sampler);
+    void signalSwitched();
     void takeOverFrom(BaseCPU *oldCPU);
 
+    Sampler *sampler;
+
+    int switchCount;
+
 #if FULL_SYSTEM
     Addr dbg_vtophys(Addr addr);
 
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 4f3fdf521..b085f077f 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -329,15 +329,30 @@ OzoneCPU<Impl>::copyToXC()
 */
 template <class Impl>
 void
-OzoneCPU<Impl>::switchOut(Sampler *sampler)
+OzoneCPU<Impl>::switchOut(Sampler *_sampler)
 {
+    sampler = _sampler;
+    switchCount = 0;
     // Front end needs state from back end, so switch out the back end first.
     backEnd->switchOut();
     frontEnd->switchOut();
-    _status = SwitchedOut;
-    if (tickEvent.scheduled())
-        tickEvent.squash();
-    sampler->signalSwitched();
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::signalSwitched()
+{
+    if (++switchCount == 2) {
+        backEnd->doSwitchOut();
+        frontEnd->doSwitchOut();
+        if (checker)
+            checker->switchOut(sampler);
+        _status = SwitchedOut;
+        if (tickEvent.scheduled())
+            tickEvent.squash();
+        sampler->signalSwitched();
+    }
+    assert(switchCount <= 2);
 }
 
 template <class Impl>
@@ -366,6 +381,11 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
             tickEvent.schedule(curTick);
         }
     }
+    // Nothing running, change status to reflect that we're no longer
+    // switched out.
+    if (_status == SwitchedOut) {
+        _status = Idle;
+    }
 }
 
 template <class Impl>
@@ -666,83 +686,6 @@ OzoneCPU<Impl>::tick()
     thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]->
         setDoubleResult(0.0);
 
-    // General code flow:
-    // Check for any interrupts.  Handle them if I do have one.
-    // Check if I have a need to fetch a new cache block.  Either a bit could be
-    // set by functions indicating that I need to fetch a new block, or I could
-    // hang onto the last PC of the last cache block I fetched and compare the
-    // current PC to that.  Setting a bit seems nicer but may be more error
-    // prone.
-    // Scan through the IQ to figure out if there's anything I can issue/execute
-    // Might need something close to the FU Pools to tell what instructions
-    // I can issue.  How to handle loads and stores vs other insts?
-    // Extremely slow way: find first inst that can possibly issue; if it's a
-    // load or a store, then iterate through load/store queue.
-    // If I can't find instructions to execute and I've got room in the IQ
-    // (which is just a counter), then grab a few instructions out of the cache
-    // line buffer until I either run out or can execute up until my limit.
-
-    numCycles++;
-
-    traceData = NULL;
-
-//    Fault fault = NoFault;
-
-#if 0 // FULL_SYSTEM
-    if (checkInterrupts && check_interrupts() && !inPalMode() &&
-        status() != IcacheMissComplete) {
-        int ipl = 0;
-        int summary = 0;
-        checkInterrupts = false;
-
-        if (readMiscReg(IPR_SIRR)) {
-            for (int i = INTLEVEL_SOFTWARE_MIN;
-                 i < INTLEVEL_SOFTWARE_MAX; i++) {
-                if (readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
-                    // See table 4-19 of 21164 hardware reference
-                    ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
-                    summary |= (ULL(1) << i);
-                }
-            }
-        }
-
-        // Is this method so that if the interrupts are switched over from
-        // another CPU they'll still be handled?
-//	uint64_t interrupts = cpuXC->cpu->intr_status();
-        uint64_t interrupts = intr_status();
-        for (int i = INTLEVEL_EXTERNAL_MIN;
-            i < INTLEVEL_EXTERNAL_MAX; i++) {
-            if (interrupts & (ULL(1) << i)) {
-                // See table 4-19 of 21164 hardware reference
-                ipl = i;
-                summary |= (ULL(1) << i);
-            }
-        }
-
-        if (readMiscReg(IPR_ASTRR))
-            panic("asynchronous traps not implemented\n");
-
-        if (ipl && ipl > readMiscReg(IPR_IPLR)) {
-            setMiscReg(IPR_ISR, summary);
-            setMiscReg(IPR_INTID, ipl);
-
-            Fault(new InterruptFault)->invoke(xc);
-
-            DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
-                    readMiscReg(IPR_IPLR), ipl, summary);
-        }
-    }
-#endif
-
-    // Make call to ISA to ensure 0 register semantics...actually because the
-    // DynInsts will generally be the register file, this should only have to
-    // happen when the xc is actually written to (during a syscall or something)
-    // maintain $r0 semantics
-//    assert(renameTable[ZeroReg]->readIntResult() == 0);
-#ifdef TARGET_ALPHA
-//    assert(renameTable[ZeroReg]->readDoubleResult() == 0);
-#endif // TARGET_ALPHA
-
     comm.advance();
     frontEnd->tick();
     backEnd->tick();
@@ -876,8 +819,8 @@ OzoneCPU<Impl>::processInterrupts()
         thread.setMiscReg(IPR_INTID, ipl);
         // @todo: Make this more transparent
         if (checker) {
-            checkerXC->setMiscReg(IPR_ISR, summary);
-            checkerXC->setMiscReg(IPR_INTID, ipl);
+            checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
+            checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
         }
         Fault fault = new InterruptFault;
         fault->invoke(thread.getXCProxy());
@@ -993,6 +936,15 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     setFuncExeInst(old_context->readFuncExeInst());
 #endif
 
+    EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
+    if (other_quiesce) {
+        // Point the quiesce event's XC at this XC so that it wakes up
+        // the proper CPU.
+        other_quiesce->xc = this;
+    }
+    if (thread->quiesceEvent) {
+        thread->quiesceEvent->xc = this;
+    }
 //    storeCondFailures = 0;
     cpu->lockFlag = false;
 
@@ -1016,7 +968,7 @@ OzoneCPU<Impl>::OzoneXC::unserialize(Checkpoint *cp, const std::string &section)
 
 #if FULL_SYSTEM
 template <class Impl>
-Event *
+EndQuiesceEvent *
 OzoneCPU<Impl>::OzoneXC::getQuiesceEvent()
 {
     return thread->quiesceEvent;
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 188925ae5..f9db9ea5c 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -68,6 +68,8 @@ class FrontEnd
 
     void switchOut();
 
+    void doSwitchOut();
+
     void takeOverFrom(ExecContext *old_xc = NULL);
 
     bool isSwitchedOut() { return switchedOut; }
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index a3eb809d0..8ae9ec696 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -240,6 +240,9 @@ template <class Impl>
 void
 FrontEnd<Impl>::tick()
 {
+    if (switchedOut)
+        return;
+
     // @todo: Maybe I want to just have direct communication...
     if (fromCommit->doneSeqNum) {
         branchPred.update(fromCommit->doneSeqNum, 0);
@@ -828,6 +831,13 @@ void
 FrontEnd<Impl>::switchOut()
 {
     switchedOut = true;
+    cpu->signalSwitched();
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::doSwitchOut()
+{
     memReq = NULL;
     squash(0, 0);
     instBuffer.clear();
diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh
index 6519b79e5..4039d8384 100644
--- a/cpu/ozone/inorder_back_end.hh
+++ b/cpu/ozone/inorder_back_end.hh
@@ -97,6 +97,10 @@ class InorderBackEnd
 
     Addr commitPC;
 
+    void switchOut() { panic("Not implemented!"); }
+    void doSwitchOut() { panic("Not implemented!"); }
+    void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); }
+
   public:
     FullCPU *cpu;
 
@@ -330,14 +334,17 @@ InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
 
     // translate to physical address
 //    Fault fault = cpu->translateDataReadReq(req);
+    req->cmd = Read;
+    req->completionEvent = NULL;
+    req->time = curTick;
+    assert(!req->data);
+    req->data = new uint8_t[64];
+    req->flags &= ~INST_READ;
+    Fault fault = cpu->read(req, data);
+    memcpy(req->data, &data, sizeof(T));
 
     // if we have a cache, do cache access too
     if (dcacheInterface) {
-        req->cmd = Read;
-        req->completionEvent = NULL;
-        req->data = new uint8_t[64];
-        req->time = curTick;
-        req->flags &= ~INST_READ;
         MemAccessResult result = dcacheInterface->access(req);
 
         // Ugly hack to get an event scheduled *only* if the access is
@@ -372,6 +379,30 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
     // translate to physical address
 //    Fault fault = cpu->translateDataWriteReq(req);
 
+    req->cmd = Write;
+    req->completionEvent = NULL;
+    req->time = curTick;
+    assert(!req->data);
+    req->data = new uint8_t[64];
+    memcpy(req->data, (uint8_t *)&data, req->size);
+
+    switch(req->size) {
+      case 1:
+        cpu->write(req, (uint8_t &)data);
+        break;
+      case 2:
+        cpu->write(req, (uint16_t &)data);
+        break;
+      case 4:
+        cpu->write(req, (uint32_t &)data);
+        break;
+      case 8:
+        cpu->write(req, (uint64_t &)data);
+        break;
+      default:
+        panic("Unexpected store size!\n");
+    }
+
     if (dcacheInterface) {
         req->cmd = Write;
         req->data = new uint8_t[64];
@@ -395,7 +426,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
 
         }
     }
-
+/*
     if (req->flags & LOCKED) {
         if (req->flags & UNCACHEABLE) {
             // Don't update result register (see stq_c in isa_desc)
@@ -404,6 +435,7 @@ InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
             req->result = 1;
         }
     }
+*/
 /*
     if (res && (fault == NoFault))
         *res = req->result;
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index 028fdaf8c..770b66ad5 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -187,7 +187,7 @@ class LWBackEnd
     void instToCommit(DynInstPtr &inst);
 
     void switchOut();
-
+    void doSwitchOut();
     void takeOverFrom(ExecContext *old_xc = NULL);
 
     bool isSwitchedOut() { return switchedOut; }
@@ -314,6 +314,7 @@ class LWBackEnd
     bool fetchHasFault;
 
     bool switchedOut;
+    bool switchPending;
 
     DynInstPtr memBarrier;
 
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index d4829629d..a82dd5b70 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -192,6 +192,7 @@ LWBackEnd<Impl>::LWBackEnd(Params *params)
     numWaitingMemOps = 0;
     waitingInsts = 0;
     switchedOut = false;
+    switchPending = false;
 
 //    IQ.setBE(this);
     LSQ.setBE(this);
@@ -631,6 +632,11 @@ LWBackEnd<Impl>::tick()
 {
     DPRINTF(BE, "Ticking back end\n");
 
+    if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
+        cpu->signalSwitched();
+        return;
+    }
+
     ROB_count[0]+= numInsts;
 
     wbCycle = 0;
@@ -682,6 +688,7 @@ LWBackEnd<Impl>::tick()
     assert(numInsts == instList.size());
     assert(waitingInsts == waitingList.size());
     assert(numWaitingMemOps == waitingMemOps.size());
+    assert(!switchedOut);
 #endif
 }
 
@@ -1440,12 +1447,24 @@ LWBackEnd<Impl>::fetchFault(Fault &fault)
 template <class Impl>
 void
 LWBackEnd<Impl>::switchOut()
+{
+    switchPending = true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::doSwitchOut()
 {
     switchedOut = true;
+    switchPending = false;
     // Need to get rid of all committed, non-speculative state and write it
     // to memory/XC.  In this case this is stores that have committed and not
     // yet written back.
+    assert(robEmpty());
+    assert(!LSQ.hasStoresToWB());
+
     LSQ.switchOut();
+
     squash(0);
 }
 
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index 9b7e48f96..fdf6bff07 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -791,6 +791,8 @@ template <class Impl>
 void
 OzoneLWLSQ<Impl>::switchOut()
 {
+//    assert(loads == 0);
+    assert(storesToWB == 0);
     switchedOut = true;
     SQIt sq_it = --(storeQueue.end());
     while (storesToWB > 0 &&
@@ -810,9 +812,12 @@ OzoneLWLSQ<Impl>::switchOut()
         // Store conditionals don't complete until *after* they have written
         // back.  If it's here and not yet sent to memory, then don't bother
         // as it's not part of committed state.
-        if (inst->isDataPrefetch() || (*sq_it).committed ||
-            (*sq_it).req->flags & LOCKED) {
+        if (inst->isDataPrefetch() || (*sq_it).committed) {
+            sq_it--;
+            continue;
+        } else if ((*sq_it).req->flags & LOCKED) {
             sq_it--;
+            assert(!(*sq_it).canWB || ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
             continue;
         }
 
-- 
cgit v1.2.3


From 36581a534240c322e1fc28b8bd6e8f13f2b0fefd Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Wed, 17 May 2006 14:25:10 -0400
Subject: Faults generated at fetch are passed to the backend by creating a
 dummy nop instruction and giving it the fault.  This unifies front end faults
 and normal instruction faults.

cpu/checker/cpu.cc:
    Fixups for fetch fault being sent with the instruction.
cpu/o3/fetch_impl.hh:
cpu/ozone/front_end_impl.hh:
    Send any faults generated at fetch along with a fake nop instruction to the back end.  This avoids having to use direct communication to check if the entire front end has drained; it is naturally handled through the nop's fault being handled when it reaches the head of commit.
cpu/ozone/front_end.hh:
    Add extra status TrapPending.
cpu/ozone/lw_back_end_impl.hh:
    Fetch fault handled through a dummy nop carrying the fetch fault.

    Avoid putting Nops on the exeList.

--HG--
extra : convert_revision : 8d9899748b34c204763a49c48a9b5113864f5789
---
 cpu/ozone/front_end.hh        |  1 +
 cpu/ozone/front_end_impl.hh   | 36 ++++++++++++++++++++++++++++--------
 cpu/ozone/lw_back_end_impl.hh | 10 ++++++++--
 3 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index f9db9ea5c..326f7d2c9 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -120,6 +120,7 @@ class FrontEnd
         SerializeComplete,
         RenameBlocked,
         QuiescePending,
+        TrapPending,
         BEBlocked
     };
 
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index 8ae9ec696..cd57aeef4 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -268,11 +268,9 @@ FrontEnd<Impl>::tick()
     }
 
     if (status == RenameBlocked || status == SerializeBlocked ||
-        status == BEBlocked) {
-        // This might cause the front end to run even though it
-        // shouldn't, but this should only be a problem for one cycle.
-        // Also will cause a one cycle bubble between changing state
-        // and restarting.
+        status == TrapPending || status == BEBlocked) {
+        // Will cause a one cycle bubble between changing state and
+        // restarting.
         DPRINTF(FE, "In blocked status.\n");
 
         fetchBlockedCycles++;
@@ -537,9 +535,32 @@ void
 FrontEnd<Impl>::handleFault(Fault &fault)
 {
     DPRINTF(FE, "Fault at fetch, telling commit\n");
-    backEnd->fetchFault(fault);
+//    backEnd->fetchFault(fault);
     // We're blocked on the back end until it handles this fault.
-    status = BEBlocked;
+    status = TrapPending;
+
+    // Get a sequence number.
+    InstSeqNum inst_seq = getAndIncrementInstSeq();
+    // We will use a nop in order to carry the fault.
+    ExtMachInst ext_inst = TheISA::NoopMachInst;
+
+    // Create a new DynInst from the dummy nop.
+    DynInstPtr instruction = new DynInst(ext_inst, PC,
+                                         PC+sizeof(MachInst),
+                                         inst_seq, cpu);
+    instruction->setPredTarg(instruction->readNextPC());
+//    instruction->setThread(tid);
+
+//    instruction->setASID(tid);
+
+    instruction->setState(thread);
+
+    instruction->traceData = NULL;
+
+    instruction->fault = fault;
+    instruction->setCanIssue();
+    instBuffer.push_back(instruction);
+    ++instBufferSize;
 }
 
 template <class Impl>
@@ -881,7 +902,6 @@ FrontEnd<Impl>::dumpInsts()
                 (*buff_it)->isSquashed());
         buff_it++;
     }
-
 }
 
 template <class Impl>
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index a82dd5b70..db0872e52 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -652,7 +652,7 @@ LWBackEnd<Impl>::tick()
         squashFromTrap();
     } else if (xcSquash) {
         squashFromXC();
-    } else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
+    } /*else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
         DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
         Fault fetch_fault = frontEnd->getFault();
         if (fetch_fault == NoFault) {
@@ -662,7 +662,7 @@ LWBackEnd<Impl>::tick()
             handleFault(fetch_fault);
             fetchHasFault = false;
         }
-    }
+        }*/
 #endif
 
     if (dispatchStatus != Blocked) {
@@ -777,6 +777,12 @@ LWBackEnd<Impl>::dispatchInsts()
                             inst->seqNum);
                     exeList.push(inst);
                 }
+            } else if (inst->isNop()) {
+                DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n",
+                        inst->seqNum);
+                inst->setIssued();
+                inst->setExecuted();
+                inst->setCanCommit();
             } else {
                 DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
                         inst->seqNum);
-- 
cgit v1.2.3


From c7e7d07ec395156015e3baf52048c403d28a6442 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 19 May 2006 14:27:46 -0400
Subject: Fixes for regression build errors.

--HG--
extra : convert_revision : 1f59c853cb0e327d7cf586021b5139f1242e4f28
---
 cpu/ozone/cpu.hh      | 1 -
 cpu/ozone/cpu_impl.hh | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 1d522b2fa..7e12e75e5 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -89,7 +89,6 @@ class OzoneCPU : public BaseCPU
     typedef typename Impl::FrontEnd FrontEnd;
     typedef typename Impl::BackEnd BackEnd;
     typedef typename Impl::DynInst DynInst;
-    typedef typename Impl::DynInst DynInst;
     typedef typename Impl::DynInstPtr DynInstPtr;
 
     typedef TheISA::MiscReg MiscReg;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index b085f077f..031b4b145 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -934,8 +934,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     setCpuId(old_context->readCpuId());
 #if !FULL_SYSTEM
     setFuncExeInst(old_context->readFuncExeInst());
-#endif
-
+#else
     EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
     if (other_quiesce) {
         // Point the quiesce event's XC at this XC so that it wakes up
@@ -947,6 +946,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     }
 //    storeCondFailures = 0;
     cpu->lockFlag = false;
+#endif
 
     old_context->setStatus(ExecContext::Unallocated);
 }
-- 
cgit v1.2.3


From ff3d16ca1f7d83ce7932868d2bf1cb3e526562ea Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Tue, 23 May 2006 16:51:16 -0400
Subject: Move kernel stats out of CPU and into XC.

arch/alpha/ev5.cc:
    Move kernel stats out of CPU and into XC.  Also be sure to check if the kernel stats exist prior to using them.

--HG--
extra : convert_revision : 565cd7026410fd7d8586f953d9b328c2e67a9473
---
 cpu/ozone/cpu.hh      |  28 ++-----
 cpu/ozone/cpu_impl.hh | 211 ++++++++------------------------------------------
 2 files changed, 41 insertions(+), 198 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/cpu.hh b/cpu/ozone/cpu.hh
index 7e12e75e5..5af2b02b2 100644
--- a/cpu/ozone/cpu.hh
+++ b/cpu/ozone/cpu.hh
@@ -57,6 +57,10 @@ class Sampler;
 class RemoteGDB;
 class GDBListener;
 
+namespace Kernel {
+    class Statistics;
+};
+
 #else
 
 class Process;
@@ -116,6 +120,8 @@ class OzoneCPU : public BaseCPU
         AlphaITB *getITBPtr() { return cpu->itb; }
 
         AlphaDTB * getDTBPtr() { return cpu->dtb; }
+
+        Kernel::Statistics *getKernelStats() { return thread->kernelStats; }
 #else
         Process *getProcessPtr() { return thread->process; }
 #endif
@@ -238,14 +244,7 @@ class OzoneCPU : public BaseCPU
 
   private:
     OzoneThreadState<Impl> thread;
-/*
-    // Squash event for when the XC needs to squash all inflight instructions.
-    struct XCSquashEvent : public Event
-    {
-        void process();
-        const char *description();
-    };
-*/
+
   public:
     // main simulation loop (one cycle)
     void tick();
@@ -288,7 +287,6 @@ class OzoneCPU : public BaseCPU
     void trace_data(T data);
 
   public:
-    //
     enum Status {
         Running,
         Idle,
@@ -325,8 +323,6 @@ class OzoneCPU : public BaseCPU
 
     int readCpuId() { return cpuId; }
 
-//    FunctionalMemory *getMemPtr() { return mem; }
-
     int cpuId;
 
     void switchOut(Sampler *sampler);
@@ -369,8 +365,6 @@ class OzoneCPU : public BaseCPU
     Status status() const { return _status; }
     void setStatus(Status new_status) { _status = new_status; }
 
-    // Not sure what an activate() call on the CPU's proxy XC would mean...
-
     virtual void activateContext(int thread_num, int delay);
     virtual void suspendContext(int thread_num);
     virtual void deallocateContext(int thread_num);
@@ -384,7 +378,6 @@ class OzoneCPU : public BaseCPU
   public:
     Counter numInst;
     Counter startNumInst;
-//    Stats::Scalar<> numInsts;
 
     virtual Counter totalInstructions() const
     {
@@ -392,9 +385,6 @@ class OzoneCPU : public BaseCPU
     }
 
   private:
-    // number of simulated memory references
-//    Stats::Scalar<> numMemRefs;
-
     // number of simulated loads
     Counter numLoad;
     Counter startNumLoad;
@@ -472,7 +462,6 @@ class OzoneCPU : public BaseCPU
     template <class T>
     Fault read(MemReqPtr &req, T &data)
     {
-//	panic("CPU READ NOT IMPLEMENTED W/NEW MEMORY\n");
 #if 0
 #if FULL_SYSTEM && defined(TARGET_ALPHA)
         if (req->flags & LOCKED) {
@@ -483,7 +472,6 @@ class OzoneCPU : public BaseCPU
 #endif
         Fault error;
         if (req->flags & LOCKED) {
-//            lockAddr = req->paddr;
             lockAddrList.insert(req->paddr);
             lockFlag = true;
         }
@@ -558,7 +546,7 @@ class OzoneCPU : public BaseCPU
             if (req->flags & UNCACHEABLE) {
                 req->result = 2;
             } else {
-                if (this->lockFlag/* && this->lockAddr == req->paddr*/) {
+                if (this->lockFlag) {
                     if (lockAddrList.find(req->paddr) !=
                         lockAddrList.end()) {
                         req->result = 1;
diff --git a/cpu/ozone/cpu_impl.hh b/cpu/ozone/cpu_impl.hh
index 031b4b145..5675da3a8 100644
--- a/cpu/ozone/cpu_impl.hh
+++ b/cpu/ozone/cpu_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 The Regents of The University of Michigan
+ * Copyright (c) 2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,8 +26,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <cstdio>
-#include <cstdlib>
+//#include <cstdio>
+//#include <cstdlib>
 
 #include "arch/isa_traits.hh" // For MachInst
 #include "base/trace.hh"
@@ -39,7 +39,7 @@
 #include "cpu/ozone/cpu.hh"
 #include "cpu/quiesce_event.hh"
 #include "cpu/static_inst.hh"
-#include "mem/base_mem.hh"
+//#include "mem/base_mem.hh"
 #include "mem/mem_interface.hh"
 #include "sim/sim_object.hh"
 #include "sim/stats.hh"
@@ -50,7 +50,7 @@
 #include "arch/alpha/tlb.hh"
 #include "arch/vtophys.hh"
 #include "base/callback.hh"
-#include "base/remote_gdb.hh"
+//#include "base/remote_gdb.hh"
 #include "cpu/profile.hh"
 #include "kern/kernel_stats.hh"
 #include "mem/functional/memory_control.hh"
@@ -94,80 +94,26 @@ OzoneCPU<Impl>::TickEvent::description()
 {
     return "OzoneCPU tick event";
 }
-/*
-template <class Impl>
-OzoneCPU<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(OzoneCPU *_cpu)
-    : Event(&mainEventQueue),
-      cpu(_cpu)
-{
-}
-
-template <class Impl>
-void
-OzoneCPU<Impl>::ICacheCompletionEvent::process()
-{
-    cpu->processICacheCompletion();
-}
-
-template <class Impl>
-const char *
-OzoneCPU<Impl>::ICacheCompletionEvent::description()
-{
-    return "OzoneCPU I-cache completion event";
-}
-
-template <class Impl>
-OzoneCPU<Impl>::DCacheCompletionEvent::
-DCacheCompletionEvent(OzoneCPU *_cpu,
-                      DynInstPtr &_inst,
-                      DCacheCompEventIt &_dcceIt)
-    : Event(&mainEventQueue),
-      cpu(_cpu),
-      inst(_inst),
-      dcceIt(_dcceIt)
-{
-    this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-OzoneCPU<Impl>::DCacheCompletionEvent::process()
-{
-    inst->setCompleted();
-
-    // Maybe remove the EA from the list of addrs?
-    cpu->eaList.clearAddr(inst->seqNum, inst->getEA());
-    cpu->dCacheCompList.erase(this->dcceIt);
-}
 
-template <class Impl>
-const char *
-OzoneCPU<Impl>::DCacheCompletionEvent::description()
-{
-    return "OzoneCPU D-cache completion event";
-}
-*/
 template <class Impl>
 OzoneCPU<Impl>::OzoneCPU(Params *p)
 #if FULL_SYSTEM
-    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), mem(p->mem),
+    : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width),
+      mem(p->mem),
 #else
     : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
       mem(p->workload[0]->getMemory()),
 #endif
       comm(5, 5)
 {
-    if (p->checker) {
-        BaseCPU *temp_checker = p->checker;
-        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
-    } else {
-        checker = NULL;
-    }
     frontEnd = new FrontEnd(p);
     backEnd = new BackEnd(p);
 
     _status = Idle;
-    if (checker) {
+
+    if (p->checker) {
+        BaseCPU *temp_checker = p->checker;
+        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
         checker->setMemory(mem);
 #if FULL_SYSTEM
         checker->setSystem(p->system);
@@ -176,19 +122,18 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
         thread.xcProxy = checkerXC;
         xcProxy = checkerXC;
     } else {
+        checker = NULL;
         thread.xcProxy = &ozoneXC;
         xcProxy = &ozoneXC;
     }
 
-    thread.inSyscall = false;
-
     ozoneXC.cpu = this;
     ozoneXC.thread = &thread;
 
+    thread.inSyscall = false;
+
     thread.setStatus(ExecContext::Suspended);
 #if FULL_SYSTEM
-//    xc = new ExecContext(this, 0, p->system, p->itb, p->dtb, p->mem);
-
     /***** All thread state stuff *****/
     thread.cpu = this;
     thread.tid = 0;
@@ -217,31 +162,15 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
     thread.profileNode = &dummyNode;
     thread.profilePC = 3;
 #else
-//    xc = new ExecContext(this, /* thread_num */ 0, p->workload[0], /* asid */ 0);
     thread.cpu = this;
     thread.tid = 0;
     thread.process = p->workload[0];
-//    thread.mem = thread.process->getMemory();
     thread.asid = 0;
 #endif // !FULL_SYSTEM
-/*
-    icacheInterface = p->icache_interface;
-    dcacheInterface = p->dcache_interface;
-
-    cacheMemReq = new MemReq();
-    cacheMemReq->xc = xc;
-    cacheMemReq->asid = 0;
-    cacheMemReq->data = new uint8_t[64];
-*/
+
     numInst = 0;
     startNumInst = 0;
-/*    numLoad = 0;
-    startNumLoad = 0;
-    lastIcacheStall = 0;
-    lastDcacheStall = 0;
 
-    issueWidth = p->issueWidth;
-*/
     execContexts.push_back(xcProxy);
 
     frontEnd->setCPU(this);
@@ -286,47 +215,7 @@ template <class Impl>
 OzoneCPU<Impl>::~OzoneCPU()
 {
 }
-/*
-template <class Impl>
-void
-OzoneCPU<Impl>::copyFromXC()
-{
-    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
-        if (i < TheISA::NumIntRegs) {
-            renameTable[i]->setIntResult(xc->readIntReg(i));
-        } else if (i < TheISA::NumFloatRegs) {
-            renameTable[i]->setDoubleResult(xc->readFloatRegDouble(i));
-        }
-    }
-
-    DPRINTF(OzoneCPU, "Func Exe inst is: %i\n", xc->func_exe_inst);
-    backEnd->funcExeInst = xc->func_exe_inst;
-//    PC = xc->readPC();
-//    nextPC = xc->regs.npc;
-}
-
-template <class Impl>
-void
-OzoneCPU<Impl>::copyToXC()
-{
-    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
-        if (i < TheISA::NumIntRegs) {
-            xc->setIntReg(i, renameTable[i]->readIntResult());
-        } else if (i < TheISA::NumFloatRegs) {
-            xc->setFloatRegDouble(i, renameTable[i]->readDoubleResult());
-        }
-    }
-
-    this->xc->regs.miscRegs.fpcr = this->regFile.miscRegs[tid].fpcr;
-    this->xc->regs.miscRegs.uniq = this->regFile.miscRegs[tid].uniq;
-    this->xc->regs.miscRegs.lock_flag = this->regFile.miscRegs[tid].lock_flag;
-    this->xc->regs.miscRegs.lock_addr = this->regFile.miscRegs[tid].lock_addr;
 
-    xc->func_exe_inst = backEnd->funcExeInst;
-    xc->regs.pc = PC;
-    xc->regs.npc = nextPC;
-}
-*/
 template <class Impl>
 void
 OzoneCPU<Impl>::switchOut(Sampler *_sampler)
@@ -394,7 +283,6 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
 {
     // Eventually change this in SMT.
     assert(thread_num == 0);
-//    assert(xcProxy);
 
     assert(_status == Idle);
     notIdleFraction++;
@@ -410,8 +298,8 @@ OzoneCPU<Impl>::suspendContext(int thread_num)
 {
     // Eventually change this in SMT.
     assert(thread_num == 0);
-//    assert(xcProxy);
-    // @todo: Figure out how to initially set the status properly so this is running.
+    // @todo: Figure out how to initially set the status properly so
+    // this is running.
 //    assert(_status == Running);
     notIdleFraction--;
     unscheduleTickEvent();
@@ -486,14 +374,7 @@ void
 OzoneCPU<Impl>::init()
 {
     BaseCPU::init();
-/*
-    copyFromXC();
 
-    // ALso copy over PC/nextPC.  This isn't normally copied in "copyFromXC()"
-    // so that the XC doesn't mess up the PC when returning from a syscall.
-    PC = xc->readPC();
-    nextPC = xc->regs.npc;
-*/
     // Mark this as in syscall so it won't need to squash
     thread.inSyscall = true;
 #if FULL_SYSTEM
@@ -514,8 +395,6 @@ template <class Impl>
 void
 OzoneCPU<Impl>::serialize(std::ostream &os)
 {
-    // At this point, all DCacheCompEvents should be processed.
-
     BaseCPU::serialize(os);
     SERIALIZE_ENUM(_status);
     nameOut(os, csprintf("%s.xc", name()));
@@ -631,31 +510,7 @@ OzoneCPU<Impl>::dbg_vtophys(Addr addr)
     return vtophys(xcProxy, addr);
 }
 #endif // FULL_SYSTEM
-/*
-template <class Impl>
-void
-OzoneCPU<Impl>::processICacheCompletion()
-{
-    switch (status()) {
-      case IcacheMiss:
-        DPRINTF(OzoneCPU, "OzoneCPU: Finished Icache miss.\n");
-
-        icacheStallCycles += curTick - lastIcacheStall;
-        _status = IcacheMissComplete;
-        cacheBlkValid = true;
-//	scheduleTickEvent(1);
-        break;
-      case SwitchedOut:
-        // If this CPU has been switched out due to sampling/warm-up,
-        // ignore any further status changes (e.g., due to cache
-        // misses outstanding at the time of the switch).
-        return;
-      default:
-        panic("OzoneCPU::processICacheCompletion: bad state");
-        break;
-    }
-}
-*/
+
 #if FULL_SYSTEM
 template <class Impl>
 void
@@ -663,7 +518,6 @@ OzoneCPU<Impl>::post_interrupt(int int_num, int index)
 {
     BaseCPU::post_interrupt(int_num, index);
 
-//    if (thread._status == ExecContext::Suspended) {
     if (_status == Idle) {
         DPRINTF(IPI,"Suspended Processor awoke\n");
 //	thread.activate();
@@ -690,9 +544,6 @@ OzoneCPU<Impl>::tick()
     frontEnd->tick();
     backEnd->tick();
 
-    // Do this here?  For now the front end will control the PC.
-//    PC = nextPC;
-
     // check for instruction-count-based events
     comInstEventQueue[0]->serviceEvents(numInst);
 
@@ -742,11 +593,13 @@ OzoneCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
     if (return_value.successful()) {
         // no error
         thread.renameTable[SyscallSuccessReg]->setIntResult(0);
-        thread.renameTable[ReturnValueReg]->setIntResult(return_value.value());
+        thread.renameTable[ReturnValueReg]->setIntResult(
+            return_value.value());
     } else {
         // got an error, return details
         thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1);
-        thread.renameTable[ReturnValueReg]->setIntResult(-return_value.value());
+        thread.renameTable[ReturnValueReg]->setIntResult(
+            -return_value.value());
     }
 }
 #else
@@ -756,15 +609,10 @@ OzoneCPU<Impl>::hwrei()
 {
     // Need to move this to ISA code
     // May also need to make this per thread
-/*
-    if (!inPalMode())
-        return new UnimplementedOpcodeFault;
 
-    thread.setNextPC(thread.readMiscReg(AlphaISA::IPR_EXC_ADDR));
-*/
     lockFlag = false;
     lockAddrList.clear();
-    kernelStats->hwrei();
+    thread.kernelStats->hwrei();
 
     checkInterrupts = true;
 
@@ -835,7 +683,7 @@ OzoneCPU<Impl>::simPalCheck(int palFunc)
 {
     // Need to move this to ISA code
     // May also need to make this per thread
-    this->kernelStats->callpal(palFunc, xcProxy);
+    thread.kernelStats->callpal(palFunc, xcProxy);
 
     switch (palFunc) {
       case PAL::halt:
@@ -874,7 +722,6 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::setStatus(Status new_status)
 {
-//    cpu->_status = new_status;
     thread->_status = new_status;
 }
 
@@ -932,6 +779,7 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     setStatus(old_context->status());
     copyArchRegs(old_context);
     setCpuId(old_context->readCpuId());
+
 #if !FULL_SYSTEM
     setFuncExeInst(old_context->readFuncExeInst());
 #else
@@ -944,6 +792,8 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
     if (thread->quiesceEvent) {
         thread->quiesceEvent->xc = this;
     }
+
+    thread->kernelStats = old_context->getKernelStats();
 //    storeCondFailures = 0;
     cpu->lockFlag = false;
 #endif
@@ -954,7 +804,12 @@ OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
 template <class Impl>
 void
 OzoneCPU<Impl>::OzoneXC::regStats(const std::string &name)
-{ }
+{
+#if FULL_SYSTEM
+    thread->kernelStats = new Kernel::Statistics(cpu->system);
+    thread->kernelStats->regStats(name + ".kern");
+#endif
+}
 
 template <class Impl>
 void
-- 
cgit v1.2.3


From 6c386396faef6f48f2d01911e59d09b192bf3c45 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Tue, 23 May 2006 16:57:14 -0400
Subject: Code cleanup.

cpu/base_dyn_inst.hh:
    Code cleanup

--HG--
extra : convert_revision : 501c03f8e4346ffbcb545ddeee30c1f8ded9baa7
---
 cpu/ozone/dyn_inst.hh         |  52 +--------------------
 cpu/ozone/dyn_inst_impl.hh    |   7 +--
 cpu/ozone/front_end.hh        |  34 +++++++++++---
 cpu/ozone/front_end_impl.hh   |  66 ++++++++++++---------------
 cpu/ozone/lw_back_end.hh      | 103 +++++++++++-------------------------------
 cpu/ozone/lw_back_end_impl.hh |  67 ++++++++++++++++-----------
 cpu/ozone/lw_lsq.hh           |  27 ++---------
 cpu/ozone/lw_lsq_impl.hh      |  41 ++++-------------
 cpu/ozone/rename_table.hh     |  28 ++++++++++++
 cpu/ozone/thread_state.hh     |  38 ++++++++++++----
 10 files changed, 194 insertions(+), 269 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh
index f251c28ea..5d48bb361 100644
--- a/cpu/ozone/dyn_inst.hh
+++ b/cpu/ozone/dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 The Regents of The University of Michigan
+ * Copyright (c) 2005-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -52,8 +52,6 @@ class OzoneDynInst : public BaseDynInst<Impl>
     // Typedef for DynInstPtr.  This is really just a RefCountingPtr<OoODynInst>.
     typedef typename Impl::DynInstPtr DynInstPtr;
 
-//    typedef typename Impl::BranchPred::BPredInfo BPredInfo;
-
     typedef TheISA::ExtMachInst ExtMachInst;
     typedef TheISA::MachInst MachInst;
     typedef TheISA::MiscReg MiscReg;
@@ -107,12 +105,6 @@ class OzoneDynInst : public BaseDynInst<Impl>
     // up.  In the future, you only really need a counter.
     bool memDepReady() { return srcMemInsts.empty(); }
 
-//    void setBPredInfo(const BPredInfo &bp_info) { bpInfo = bp_info; }
-
-//    BPredInfo &getBPredInfo() { return bpInfo; }
-
-//    OzoneXC *thread;
-
   private:
     void initInstPtrs();
 
@@ -133,20 +125,12 @@ class OzoneDynInst : public BaseDynInst<Impl>
      */
     DynInstPtr prevDestInst[MaxInstSrcRegs];
 
-//    BPredInfo bpInfo;
-
   public:
 
     Fault initiateAcc();
 
     Fault completeAcc();
-/*
-    template <class T>
-    Fault read(Addr addr, T &data, unsigned flags);
 
-    template <class T>
-    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
-*/
     // The register accessor methods provide the index of the
     // instruction's operand (e.g., 0 or 1), not the architectural
     // register index, to simplify the implementation of register
@@ -244,38 +228,4 @@ class OzoneDynInst : public BaseDynInst<Impl>
     bool iqItValid;
 };
 
-/*
-template<class Impl>
-template<class T>
-inline Fault
-OzoneDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
-{
-    Fault fault = this->cpu->read(addr, data, flags, this);
-
-    if (this->traceData) {
-        this->traceData->setAddr(addr);
-        this->traceData->setData(data);
-    }
-
-    return fault;
-}
-
-template<class Impl>
-template<class T>
-inline Fault
-OzoneDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
-{
-    Fault fault = this->cpu->write(data, addr, flags, res, this);
-
-    this->storeSize = sizeof(T);
-    this->storeData = data;
-
-    if (this->traceData) {
-        this->traceData->setAddr(addr);
-        this->traceData->setData(data);
-    }
-
-    return fault;
-}
-*/
 #endif // __CPU_OZONE_DYN_INST_HH__
diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh
index a7e4460a1..f891ec515 100644
--- a/cpu/ozone/dyn_inst_impl.hh
+++ b/cpu/ozone/dyn_inst_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 The Regents of The University of Michigan
+ * Copyright (c) 2005-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -266,12 +266,7 @@ OzoneDynInst<Impl>::hwrei()
     this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
 
     this->cpu->hwrei();
-/*
-    this->cpu->kernelStats->hwrei();
 
-    this->cpu->checkInterrupts = true;
-    this->cpu->lockFlag = false;
-*/
     // FIXME: XXX check for interrupts? XXX
     return NoFault;
 }
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh
index 326f7d2c9..dd382491f 100644
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -1,14 +1,39 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #ifndef __CPU_OZONE_FRONT_END_HH__
 #define __CPU_OZONE_FRONT_END_HH__
 
 #include <deque>
 
-//#include "cpu/ozone/cpu.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/bpred_unit.hh"
 #include "cpu/ozone/rename_table.hh"
-//#include "cpu/ozone/thread_state.hh"
 #include "mem/mem_req.hh"
 #include "sim/eventq.hh"
 #include "sim/stats.hh"
@@ -132,11 +157,6 @@ class FrontEnd
 
     typedef typename Impl::BranchPred BranchPred;
 
-    // Typedef for semi-opaque type that holds any information the branch
-    // predictor needs to update itself.  Only two fields are used outside of
-    // branch predictor, nextPC and isTaken.
-//    typedef typename BranchPred::BPredInfo BPredInfo;
-
     BranchPred branchPred;
 
     class ICacheCompletionEvent : public Event
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index cd57aeef4..15adae9b4 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #include "arch/faults.hh"
 #include "arch/isa_traits.hh"
@@ -26,14 +53,6 @@ FrontEnd<Impl>::FrontEnd(Params *params)
 
     status = Idle;
 
-    // Setup branch predictor.
-
-    // Setup Memory Request
-/*
-    memReq = new MemReq();
-    memReq->asid = 0;
-    memReq->data = new uint8_t[64];
-*/
     memReq = NULL;
     // Size of cache block.
     cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
@@ -77,7 +96,6 @@ void
 FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
 {
     xc = xc_ptr;
-//    memReq->xc = xc;
 }
 
 template <class Impl>
@@ -321,7 +339,6 @@ FrontEnd<Impl>::tick()
             break;
         }
 
-        // if (generalizeFetch) {
         processInst(inst);
 
         if (status == SerializeBlocked) {
@@ -333,11 +350,6 @@ FrontEnd<Impl>::tick()
         instBuffer.push_back(inst);
         ++instBufferSize;
         ++num_inst;
-        // } else {
-        // fetch(num_inst);
-        // decode(num_inst);
-        // rename(num_inst);
-        // }
 
 #if FULL_SYSTEM
         if (inst->isQuiesce()) {
@@ -402,10 +414,6 @@ FrontEnd<Impl>::fetchCacheLine()
     // Translate the instruction request.
     fault = cpu->translateInstReq(memReq);
 
-    // In the case of faults, the fetch stage may need to stall and wait
-    // on what caused the fetch (ITB or Icache miss).
-//    assert(fault == NoFault);
-
     // Now do the timing access to see whether or not the instruction
     // exists within the cache.
     if (icacheInterface && fault == NoFault) {
@@ -466,7 +474,6 @@ FrontEnd<Impl>::processInst(DynInstPtr &inst)
 
     Addr inst_PC = inst->readPC();
 
-//    BPredInfo bp_info = branchPred.lookup(inst_PC);
     if (!inst->isControl()) {
         inst->setPredTarg(inst->readNextPC());
     } else {
@@ -482,7 +489,6 @@ FrontEnd<Impl>::processInst(DynInstPtr &inst)
             "%#x\n", inst->seqNum, inst_PC, next_PC);
 
 //    inst->setNextPC(next_PC);
-//    inst->setBPredInfo(bp_info);
 
     // Not sure where I should set this
     PC = next_PC;
@@ -535,7 +541,7 @@ void
 FrontEnd<Impl>::handleFault(Fault &fault)
 {
     DPRINTF(FE, "Fault at fetch, telling commit\n");
-//    backEnd->fetchFault(fault);
+
     // We're blocked on the back end until it handles this fault.
     status = TrapPending;
 
@@ -586,9 +592,6 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
         instBuffer.pop_back();
         --instBufferSize;
 
-        // Fix up branch predictor if necessary.
-//        branchPred.undo(inst->getBPredInfo());
-
         freeRegs+= inst->numDestRegs();
     }
 
@@ -607,7 +610,6 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
     // Clear the icache miss if it's outstanding.
     if (status == IcacheMissStall && icacheInterface) {
         DPRINTF(FE, "Squashing outstanding Icache miss.\n");
-//        icacheInterface->squash(0);
         memReq = NULL;
     }
 
@@ -693,17 +695,9 @@ template <class Impl>
 bool
 FrontEnd<Impl>::updateStatus()
 {
-//    bool rename_block = freeRegs <= 0;
     bool serialize_block = !backEnd->robEmpty() || instBufferSize;
     bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
     bool ret_val = false;
-/*
-  // Should already be handled through addFreeRegs function
-    if (status == RenameBlocked && !rename_block) {
-        status = Running;
-        ret_val = true;
-    }
-*/
 
     if (status == SerializeBlocked && !serialize_block) {
         status = SerializeComplete;
@@ -753,10 +747,6 @@ FrontEnd<Impl>::getInstFromCacheline()
 
     // PC of inst is not in this cache block
     if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
-//        DPRINTF(OoOCPU, "OoOCPU: PC is not in this cache block\n");
-//        DPRINTF(OoOCPU, "OoOCPU: PC: %#x, cacheBlkPC: %#x, cacheBlkValid: %i",
-//                PC, cacheBlkPC, cacheBlkValid);
-//        panic("Instruction not in cache line or cache line invalid!");
         return NULL;
     }
 
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh
index 770b66ad5..1c03ffb73 100644
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #ifndef __CPU_OZONE_LW_BACK_END_HH__
 #define __CPU_OZONE_LW_BACK_END_HH__
@@ -238,10 +265,6 @@ class LWBackEnd
     Counter funcExeInst;
 
   private:
-//    typedef typename Impl::InstQueue InstQueue;
-
-//    InstQueue IQ;
-
     typedef typename Impl::LdstQueue LdstQueue;
 
     LdstQueue LSQ;
@@ -342,8 +365,6 @@ class LWBackEnd
 
     bool exactFullStall;
 
-//    bool fetchRedirect[Impl::MaxThreads];
-
     // number of cycles stalled for D-cache misses
 /*    Stats::Scalar<> dcacheStallCycles;
       Counter lastDcacheStall;
@@ -438,43 +459,6 @@ template <class T>
 Fault
 LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
 {
-/*    memReq->reset(addr, sizeof(T), flags);
-
-    // translate to physical address
-    Fault fault = cpu->translateDataReadReq(memReq);
-
-    // if we have a cache, do cache access too
-    if (fault == NoFault && dcacheInterface) {
-        memReq->cmd = Read;
-        memReq->completionEvent = NULL;
-        memReq->time = curTick;
-        memReq->flags &= ~INST_READ;
-        MemAccessResult result = dcacheInterface->access(memReq);
-
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents()) {
-            // Fix this hack for keeping funcExeInst correct with loads that
-            // are executed twice.
-            --funcExeInst;
-
-            memReq->completionEvent = &cacheCompletionEvent;
-            lastDcacheStall = curTick;
-//	    unscheduleTickEvent();
-//	    status = DcacheMissStall;
-            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
-        } else {
-            // do functional access
-            fault = thread->mem->read(memReq, data);
-
-        }
-    }
-*/
-/*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
-        recordEvent("Uncached Read");
-*/
     return LSQ.read(req, data, load_idx);
 }
 
@@ -483,39 +467,6 @@ template <class T>
 Fault
 LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
 {
-/*
-    memReq->reset(addr, sizeof(T), flags);
-
-    // translate to physical address
-    Fault fault = cpu->translateDataWriteReq(memReq);
-
-    if (fault == NoFault && dcacheInterface) {
-        memReq->cmd = Write;
-        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
-        memReq->completionEvent = NULL;
-        memReq->time = curTick;
-        memReq->flags &= ~INST_READ;
-        MemAccessResult result = dcacheInterface->access(memReq);
-
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
-        if (result != MA_HIT && dcacheInterface->doEvents()) {
-            memReq->completionEvent = &cacheCompletionEvent;
-            lastDcacheStall = curTick;
-//	    unscheduleTickEvent();
-//	    status = DcacheMissStall;
-            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
-        }
-    }
-
-    if (res && (fault == NoFault))
-        *res = memReq->result;
-        */
-/*
-    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
-        recordEvent("Uncached Write");
-*/
     return LSQ.write(req, data, store_idx);
 }
 
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index db0872e52..881d6e6b1 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -1,7 +1,34 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
-#include "encumbered/cpu/full/op_class.hh"
 #include "cpu/checker/cpu.hh"
 #include "cpu/ozone/lw_back_end.hh"
+#include "encumbered/cpu/full/op_class.hh"
 
 template <class Impl>
 void
@@ -194,7 +221,6 @@ LWBackEnd<Impl>::LWBackEnd(Params *params)
     switchedOut = false;
     switchPending = false;
 
-//    IQ.setBE(this);
     LSQ.setBE(this);
 
     // Setup IQ and LSQ with their parameters here.
@@ -202,8 +228,6 @@ LWBackEnd<Impl>::LWBackEnd(Params *params)
 
     instsToExecute = i2e.getWire(-1);
 
-//    IQ.setIssueExecQueue(&i2e);
-
     dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
     issueWidth = params->issueWidth ? params->issueWidth : width;
     wbWidth = params->wbWidth ? params->wbWidth : width;
@@ -538,8 +562,6 @@ LWBackEnd<Impl>::regStats()
         .desc("ROB Occupancy per cycle")
         .flags(total | cdf)
         ;
-
-//    IQ.regStats();
 }
 
 template <class Impl>
@@ -652,17 +674,7 @@ LWBackEnd<Impl>::tick()
         squashFromTrap();
     } else if (xcSquash) {
         squashFromXC();
-    } /*else if (fetchHasFault && robEmpty() && frontEnd->isEmpty() && !LSQ.hasStoresToWB()) {
-        DPRINTF(BE, "ROB and front end empty, handling fetch fault\n");
-        Fault fetch_fault = frontEnd->getFault();
-        if (fetch_fault == NoFault) {
-            DPRINTF(BE, "Fetch no longer has a fault, cancelling out.\n");
-            fetchHasFault = false;
-        } else {
-            handleFault(fetch_fault);
-            fetchHasFault = false;
-        }
-        }*/
+    }
 #endif
 
     if (dispatchStatus != Blocked) {
@@ -773,7 +785,8 @@ LWBackEnd<Impl>::dispatchInsts()
                     inst->iqItValid = true;
                     waitingInsts++;
                 } else {
-                    DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+                    DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+                            "exeList.\n",
                             inst->seqNum);
                     exeList.push(inst);
                 }
@@ -784,7 +797,8 @@ LWBackEnd<Impl>::dispatchInsts()
                 inst->setExecuted();
                 inst->setCanCommit();
             } else {
-                DPRINTF(BE, "Instruction [sn:%lli] ready, addding to exeList.\n",
+                DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+                        "exeList.\n",
                         inst->seqNum);
                 exeList.push(inst);
             }
@@ -993,7 +1007,7 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
 
     writeback_count[0]++;
 }
-
+#if 0
 template <class Impl>
 void
 LWBackEnd<Impl>::writebackInsts()
@@ -1040,7 +1054,7 @@ LWBackEnd<Impl>::writebackInsts()
     consumer_inst[0]+= consumer_insts;
     writeback_count[0]+= inst_num;
 }
-
+#endif
 template <class Impl>
 bool
 LWBackEnd<Impl>::commitInst(int inst_num)
@@ -1219,15 +1233,15 @@ LWBackEnd<Impl>::commitInst(int inst_num)
 
     --numInsts;
     ++thread->funcExeInst;
-    // Maybe move this to where the fault is handled; if the fault is handled,
-    // don't try to set this myself as the fault will set it.  If not, then
-    // I set thread->PC = thread->nextPC and thread->nextPC = thread->nextPC + 4.
+    // Maybe move this to where the fault is handled; if the fault is
+    // handled, don't try to set this myself as the fault will set it.
+    // If not, then I set thread->PC = thread->nextPC and
+    // thread->nextPC = thread->nextPC + 4.
     thread->setPC(thread->readNextPC());
     thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
     updateComInstStats(inst);
 
     // Write the done sequence number here.
-//    LSQ.commitLoads(inst->seqNum);
     toIEW->doneSeqNum = inst->seqNum;
     lastCommitCycle = curTick;
 
@@ -1357,7 +1371,8 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
     }
 
     while (memBarrier && memBarrier->seqNum > sn) {
-        DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously squashed)\n", memBarrier->seqNum);
+        DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
+                "squashed)\n", memBarrier->seqNum);
         memBarrier->clearMemDependents();
         if (memBarrier->memDepReady()) {
             DPRINTF(BE, "No previous barrier\n");
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh
index 042610324..6fe343b42 100644
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -138,7 +138,6 @@ class OzoneLWLSQ {
     /** Executes a load instruction. */
     Fault executeLoad(DynInstPtr &inst);
 
-//    Fault executeLoad(int lq_idx);
     /** Executes a store instruction. */
     Fault executeStore(DynInstPtr &inst);
 
@@ -304,10 +303,8 @@ class OzoneLWLSQ {
     Status _status;
 
     /** The store queue. */
-//    std::vector<SQEntry> storeQueue;
     std::list<SQEntry> storeQueue;
     /** The load queue. */
-//    std::vector<DynInstPtr> loadQueue;
     std::list<DynInstPtr> loadQueue;
 
     typedef typename std::list<SQEntry>::iterator SQIt;
@@ -365,7 +362,6 @@ class OzoneLWLSQ {
      */
     InstSeqNum stallingStoreIsn;
     /** The index of the above store. */
-//    int stallingLoadIdx;
     LQIt stallingLoad;
 
     /** Whether or not a load is blocked due to the memory system.  It is
@@ -398,8 +394,6 @@ class OzoneLWLSQ {
     template <class T>
     Fault write(MemReqPtr &req, T &data, int store_idx);
 
-    /** Returns the index of the head load instruction. */
-//    int getLoadHead() { return loadHead; }
     /** Returns the sequence number of the head load instruction. */
     InstSeqNum getLoadHeadSeqNum()
     {
@@ -411,8 +405,6 @@ class OzoneLWLSQ {
 
     }
 
-    /** Returns the index of the head store instruction. */
-//    int getStoreHead() { return storeHead; }
     /** Returns the sequence number of the head store instruction. */
     InstSeqNum getStoreHeadSeqNum()
     {
@@ -604,12 +596,7 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
         DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
                 "vaddr:%#x flags:%i\n",
                 inst->readPC(), req->paddr, req->vaddr, req->flags);
-/*
-        Addr debug_addr = ULL(0xfffffc0000be81a8);
-        if (req->vaddr == debug_addr) {
-            debug_break();
-        }
-*/
+
         assert(!req->completionEvent);
         req->completionEvent =
             new typename BackEnd::LdWritebackEvent(inst, be);
@@ -631,9 +618,6 @@ OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
             _status = DcacheMissStall;
 
         } else {
-//            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-//                    inst->seqNum);
-
             DPRINTF(OzoneLSQ, "D-cache hit!\n");
         }
     } else {
@@ -664,12 +648,7 @@ OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
     assert(!req->data);
     req->data = new uint8_t[64];
     memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-/*
-    Addr debug_addr = ULL(0xfffffc0000be81a8);
-    if (req->vaddr == debug_addr) {
-        debug_break();
-    }
-*/
+
     // This function only writes the data to the store queue, so no fault
     // can happen here.
     return NoFault;
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index fdf6bff07..2f85a0396 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -104,12 +104,6 @@ OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
         SQIndices.push(i);
     }
 
-    // May want to initialize these entries to NULL
-
-//    loadHead = loadTail = 0;
-
-//    storeHead = storeWBIdx = storeTail = 0;
-
     usedPorts = 0;
     cachePorts = params->cachePorts;
 
@@ -197,8 +191,6 @@ OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
     } else {
         insertStore(inst);
     }
-
-//    inst->setInLSQ();
 }
 
 template <class Impl>
@@ -569,12 +561,9 @@ OzoneLWLSQ<Impl>::writebackStores()
             }
 
             if (result != MA_HIT && dcacheInterface->doEvents()) {
-//                Event *wb = NULL;
                 store_event->miss = true;
                 typename BackEnd::LdWritebackEvent *wb = NULL;
                 if (req->flags & LOCKED) {
-                    // Stx_C does not generate a system port transaction.
-//                    req->result=1;
                     wb = new typename BackEnd::LdWritebackEvent(inst,
                                                             be);
                     store_event->wbEvent = wb;
@@ -585,8 +574,6 @@ OzoneLWLSQ<Impl>::writebackStores()
 //                DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
 //                        inst->seqNum);
 
-                // Will stores need their own kind of writeback events?
-                // Do stores even need writeback events?
                 be->addDcacheMiss(inst);
 
                 lastDcacheStall = curTick;
@@ -604,20 +591,16 @@ OzoneLWLSQ<Impl>::writebackStores()
 //                        inst->seqNum);
 
                 if (req->flags & LOCKED) {
-                    // Stx_C does not generate a system port transaction.
-/*                    if (req->flags & UNCACHEABLE) {
-                        req->result = 2;
-                    } else {
-                        req->result = 1;
-                    }
-*/
+                    // Stx_C does not generate a system port
+                    // transaction in the 21264, but that might be
+                    // hard to accomplish in this model.
+
                     typename BackEnd::LdWritebackEvent *wb =
                         new typename BackEnd::LdWritebackEvent(inst,
                                                                be);
                     store_event->wbEvent = wb;
                 }
                 sq_it--;
-//                completeStore(inst->sqIdx);
             }
         } else {
             panic("Must HAVE DCACHE!!!!!\n");
@@ -780,7 +763,7 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
     SQIndices.push(inst->sqIdx);
     storeQueue.erase(sq_it);
     --stores;
-//    assert(!inst->isCompleted());
+
     inst->setCompleted();
     if (cpu->checker) {
         cpu->checker->tick(inst);
@@ -791,7 +774,6 @@ template <class Impl>
 void
 OzoneLWLSQ<Impl>::switchOut()
 {
-//    assert(loads == 0);
     assert(storesToWB == 0);
     switchedOut = true;
     SQIt sq_it = --(storeQueue.end());
@@ -804,8 +786,6 @@ OzoneLWLSQ<Impl>::switchOut()
 
         if ((*sq_it).size == 0 && !(*sq_it).completed) {
             sq_it--;
-//            completeStore(inst->sqIdx);
-
             continue;
         }
 
@@ -817,7 +797,8 @@ OzoneLWLSQ<Impl>::switchOut()
             continue;
         } else if ((*sq_it).req->flags & LOCKED) {
             sq_it--;
-            assert(!(*sq_it).canWB || ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
+            assert(!(*sq_it).canWB ||
+                   ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
             continue;
         }
 
@@ -886,12 +867,6 @@ OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
         SQIndices.push(i);
     }
 
-    // May want to initialize these entries to NULL
-
-//    loadHead = loadTail = 0;
-
-//    storeHead = storeWBIdx = storeTail = 0;
-
     usedPorts = 0;
 
     loadFaultInst = storeFaultInst = memDepViolator = NULL;
diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh
index afbf6ff32..6ee23b21b 100644
--- a/cpu/ozone/rename_table.hh
+++ b/cpu/ozone/rename_table.hh
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
 #ifndef __CPU_OZONE_RENAME_TABLE_HH__
 #define __CPU_OZONE_RENAME_TABLE_HH__
 
diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh
index 269fc6459..c86c3a720 100644
--- a/cpu/ozone/thread_state.hh
+++ b/cpu/ozone/thread_state.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #ifndef __CPU_OZONE_THREAD_STATE_HH__
 #define __CPU_OZONE_THREAD_STATE_HH__
@@ -62,19 +89,14 @@ struct OzoneThreadState : public ThreadState {
 
     void setStatus(Status new_status) { _status = new_status; }
 
-    RenameTable<Impl> renameTable; // Should I include backend and frontend
-    // tables here?  For the ozone CPU, maybe, for the new full CPU, probably
-    // not...you wouldn't want threads just accessing the backend/frontend
-    // rename tables.
-    Addr PC; // What should these be set to?  Probably the committed ones.
+    RenameTable<Impl> renameTable;
+    Addr PC;
     Addr nextPC;
 
-    // Current instruction?
+    // Current instruction
     TheISA::MachInst inst;
 
     TheISA::RegFile regs;
-    // Front end?  Back end?
-//    MemReqPtr memReq;
 
     typename Impl::FullCPU *cpu;
 
-- 
cgit v1.2.3


From 3fe35232322daef87a0b85d7f3ca4c18330ed7c4 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Wed, 24 May 2006 14:31:06 -0400
Subject: Support new flags now used instead of flags in decoder.isa.

cpu/ozone/front_end_impl.hh:
cpu/ozone/lw_back_end_impl.hh:
cpu/ozone/lw_lsq_impl.hh:
    Support new flags added in.

--HG--
extra : convert_revision : 2e756fd1913cf600650afc39dd715d59b9b89c42
---
 cpu/ozone/front_end_impl.hh   | 10 +++++++---
 cpu/ozone/lw_back_end_impl.hh | 12 ++++++++----
 cpu/ozone/lw_lsq_impl.hh      |  7 +++----
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'cpu/ozone')

diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh
index 15adae9b4..ffbcf3340 100644
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -503,11 +503,14 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
     if (serializeNext) {
         inst->setSerializeBefore();
         serializeNext = false;
-    } else if (!inst->isSerializing()) {
+    } else if (!inst->isSerializing() &&
+               !inst->isIprAccess() &&
+               !inst->isStoreConditional()) {
         return false;
     }
 
-    if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
+    if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
+        !inst->isSerializeHandled()) {
         DPRINTF(FE, "Serialize before instruction encountered.\n");
 
         if (!inst->isTempSerializeBefore()) {
@@ -523,7 +526,8 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
 
         barrierInst = inst;
         return true;
-    } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) {
+    } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
+               && !inst->isSerializeHandled()) {
         DPRINTF(FE, "Serialize after instruction encountered.\n");
 
         inst->setSerializeHandled();
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh
index 881d6e6b1..41b4ea24b 100644
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -66,8 +66,9 @@ LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
         DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
 
         if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
-            !dep_inst->isNonSpeculative() &&
-            dep_inst->memDepReady() && !dep_inst->isMemBarrier() && !dep_inst->isWriteBarrier()) {
+            !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() &&
+            dep_inst->memDepReady() && !dep_inst->isMemBarrier() &&
+            !dep_inst->isWriteBarrier()) {
             DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
                     dep_inst->seqNum);
             exeList.push(dep_inst);
@@ -768,7 +769,9 @@ LWBackEnd<Impl>::dispatchInsts()
             }
             memBarrier = inst;
             inst->setCanCommit();
-        } else if (inst->readyToIssue() && !inst->isNonSpeculative()) {
+        } else if (inst->readyToIssue() &&
+                   !inst->isNonSpeculative() &&
+                   !inst->isStoreConditional()) {
             if (inst->isMemRef()) {
 
                 LSQ.insert(inst);
@@ -803,7 +806,7 @@ LWBackEnd<Impl>::dispatchInsts()
                 exeList.push(inst);
             }
         } else {
-            if (inst->isNonSpeculative()) {
+            if (inst->isNonSpeculative() || inst->isStoreConditional()) {
                 inst->setCanCommit();
                 DPRINTF(BE, "Adding non speculative instruction\n");
             }
@@ -1079,6 +1082,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     // or store inst.  Signal backwards that it should be executed.
     if (!inst->isExecuted()) {
         if (inst->isNonSpeculative() ||
+            inst->isStoreConditional() ||
             inst->isMemBarrier() ||
             inst->isWriteBarrier()) {
 #if !FULL_SYSTEM
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh
index 2f85a0396..f72bbb1cc 100644
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -364,10 +364,9 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
         if (store_fault != NoFault) {
             panic("Fault in a store instruction!");
             storeFaultInst = store_inst;
-        } else if (store_inst->isNonSpeculative()) {
-            // Nonspeculative accesses (namely store conditionals)
-            // need to set themselves as able to writeback if we
-            // haven't had a fault by here.
+        } else if (store_inst->isStoreConditional()) {
+            // Store conditionals need to set themselves as able to
+            // writeback if we haven't had a fault by here.
             (*sq_it).canWB = true;
 
             ++storesToWB;
-- 
cgit v1.2.3