From 7940c10ace28d5b93a61d4d278e6647e0c497149 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 2 Jun 2006 18:15:20 -0400
Subject: Fixes to get compiling to work.  This is mainly fixing up some
 includes; changing functions within the XCs; changing MemReqPtrs to Requests
 or Packets where appropriate.

Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly.  This check-in is so that the merge between m5 and newmem is no longer outstanding.

src/SConscript:
    Need to include FU Pool for new CPU model.  I'll try to figure out a cleaner way to handle this in the future.
src/base/traceflags.py:
    Include new traces flags, fix up merge mess up.
src/cpu/SConscript:
    Include the base_dyn_inst.cc as one of othe sources.
    Don't compile the Ozone CPU for now.
src/cpu/base.cc:
    Remove an extra } from the merge.
src/cpu/base_dyn_inst.cc:
    Fixes to make compiling work.  Don't instantiate the OzoneCPU for now.
src/cpu/base_dyn_inst.hh:
src/cpu/o3/2bit_local_pred.cc:
src/cpu/o3/alpha_cpu_builder.cc:
src/cpu/o3/alpha_cpu_impl.hh:
src/cpu/o3/alpha_dyn_inst.hh:
src/cpu/o3/alpha_params.hh:
src/cpu/o3/bpred_unit.cc:
src/cpu/o3/btb.hh:
src/cpu/o3/commit.hh:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/cpu.hh:
src/cpu/o3/fetch.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/free_list.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/inst_queue_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/sat_counter.hh:
src/cpu/op_class.hh:
src/cpu/ozone/cpu.hh:
src/cpu/checker/cpu.cc:
src/cpu/checker/cpu.hh:
src/cpu/checker/exec_context.hh:
src/cpu/checker/o3_cpu_builder.cc:
src/cpu/ozone/cpu_impl.hh:
src/mem/request.hh:
src/cpu/o3/fu_pool.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/back_end.hh:
src/cpu/ozone/dyn_inst.cc:
src/cpu/ozone/dyn_inst.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/inorder_back_end.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/ozone_impl.hh:
src/cpu/ozone/thread_state.hh:
    Fixes to get compiling to work.
src/cpu/o3/alpha_cpu.hh:
    Fixes to get compiling to work.
    Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs.
src/cpu/o3/alpha_dyn_inst_impl.hh:
    Fixes to get compiling to work.
    Pass in the packet to the completeAcc function.
    Fix up syscall function.

--HG--
rename : cpu/activity.cc => src/cpu/activity.cc
rename : cpu/activity.hh => src/cpu/activity.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc
rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc
rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh
rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc
rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh
rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc
rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc
rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc
rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh
rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc
rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh
rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh
rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc
rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc
rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc
rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh
rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh
rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh
rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh
rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc
rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh
rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh
rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc
rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py
extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8
---
 src/cpu/o3/2bit_local_pred.cc     |   1 +
 src/cpu/o3/alpha_cpu.hh           |  80 ++--
 src/cpu/o3/alpha_cpu_builder.cc   |  13 +-
 src/cpu/o3/alpha_cpu_impl.hh      |  86 ++--
 src/cpu/o3/alpha_dyn_inst.hh      |   9 +-
 src/cpu/o3/alpha_dyn_inst_impl.hh |  12 +-
 src/cpu/o3/alpha_params.hh        |   9 +-
 src/cpu/o3/bpred_unit.cc          |   4 +-
 src/cpu/o3/btb.hh                 |   1 +
 src/cpu/o3/commit.hh              |   4 -
 src/cpu/o3/commit_impl.hh         |   3 +-
 src/cpu/o3/cpu.cc                 |  21 +-
 src/cpu/o3/cpu.hh                 |  22 +-
 src/cpu/o3/dep_graph.hh           | 213 ++++++++++
 src/cpu/o3/fetch.hh               |  67 +--
 src/cpu/o3/fetch_impl.hh          | 180 ++++----
 src/cpu/o3/free_list.hh           |   1 +
 src/cpu/o3/fu_pool.cc             | 295 +++++++++++++
 src/cpu/o3/fu_pool.hh             | 162 +++++++
 src/cpu/o3/iew.hh                 |  19 -
 src/cpu/o3/iew_impl.hh            |  56 +--
 src/cpu/o3/inst_queue.hh          |   2 +-
 src/cpu/o3/inst_queue_impl.hh     |   3 +-
 src/cpu/o3/lsq.cc                 |  36 ++
 src/cpu/o3/lsq.hh                 | 324 ++++++++++++++
 src/cpu/o3/lsq_impl.hh            | 538 +++++++++++++++++++++++
 src/cpu/o3/lsq_unit.cc            |  36 ++
 src/cpu/o3/lsq_unit.hh            | 629 +++++++++++++++++++++++++++
 src/cpu/o3/lsq_unit_impl.hh       | 866 ++++++++++++++++++++++++++++++++++++++
 src/cpu/o3/regfile.hh             |  36 +-
 src/cpu/o3/sat_counter.hh         |   1 +
 src/cpu/o3/scoreboard.cc          | 106 +++++
 src/cpu/o3/scoreboard.hh          | 114 +++++
 src/cpu/o3/thread_state.hh        | 112 +++++
 34 files changed, 3743 insertions(+), 318 deletions(-)
 create mode 100644 src/cpu/o3/dep_graph.hh
 create mode 100644 src/cpu/o3/fu_pool.cc
 create mode 100644 src/cpu/o3/fu_pool.hh
 create mode 100644 src/cpu/o3/lsq.cc
 create mode 100644 src/cpu/o3/lsq.hh
 create mode 100644 src/cpu/o3/lsq_impl.hh
 create mode 100644 src/cpu/o3/lsq_unit.cc
 create mode 100644 src/cpu/o3/lsq_unit.hh
 create mode 100644 src/cpu/o3/lsq_unit_impl.hh
 create mode 100644 src/cpu/o3/scoreboard.cc
 create mode 100644 src/cpu/o3/scoreboard.hh
 create mode 100644 src/cpu/o3/thread_state.hh

(limited to 'src/cpu/o3')

diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc
index c3fb2fdb8..2f768fd34 100644
--- a/src/cpu/o3/2bit_local_pred.cc
+++ b/src/cpu/o3/2bit_local_pred.cc
@@ -27,6 +27,7 @@
  */
 
 #include "base/intmath.hh"
+#include "base/misc.hh"
 #include "base/trace.hh"
 #include "cpu/o3/2bit_local_pred.hh"
 
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
index 1bab0703e..fe88a1acc 100644
--- a/src/cpu/o3/alpha_cpu.hh
+++ b/src/cpu/o3/alpha_cpu.hh
@@ -39,11 +39,15 @@ namespace Kernel {
     class Statistics;
 };
 
+class TranslatingPort;
+
 template <class Impl>
 class AlphaFullCPU : public FullO3CPU<Impl>
 {
   protected:
     typedef TheISA::IntReg IntReg;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::MiscReg MiscReg;
     typedef TheISA::RegFile RegFile;
     typedef TheISA::MiscRegFile MiscRegFile;
@@ -69,7 +73,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
         virtual int readCpuId() { return cpu->cpu_id; }
 
-        virtual FunctionalMemory *getMemPtr() { return thread->mem; }
+        virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
 
 #if FULL_SYSTEM
         virtual System *getSystemPtr() { return cpu->system; }
@@ -135,19 +139,23 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
         virtual uint64_t readIntReg(int reg_idx);
 
-        virtual float readFloatRegSingle(int reg_idx);
+        virtual FloatReg readFloatReg(int reg_idx, int width);
+
+        virtual FloatReg readFloatReg(int reg_idx);
 
-        virtual double readFloatRegDouble(int reg_idx);
+        virtual FloatRegBits readFloatRegBits(int reg_idx, int width);
 
-        virtual uint64_t readFloatRegInt(int reg_idx);
+        virtual FloatRegBits readFloatRegBits(int reg_idx);
 
         virtual void setIntReg(int reg_idx, uint64_t val);
 
-        virtual void setFloatRegSingle(int reg_idx, float val);
+        virtual void setFloatReg(int reg_idx, FloatReg val, int width);
+
+        virtual void setFloatReg(int reg_idx, FloatReg val);
 
-        virtual void setFloatRegDouble(int reg_idx, double val);
+        virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
 
-        virtual void setFloatRegInt(int reg_idx, uint64_t val);
+        virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
 
         virtual uint64_t readPC()
         { return cpu->readPC(thread->tid); }
@@ -159,6 +167,15 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
         virtual void setNextPC(uint64_t val);
 
+        virtual uint64_t readNextNPC()
+        {
+            panic("Alpha has no NextNPC!");
+            return 0;
+        }
+
+        virtual void setNextNPC(uint64_t val)
+        { panic("Alpha has no NextNPC!"); }
+
         virtual MiscReg readMiscReg(int misc_reg)
         { return cpu->readMiscReg(misc_reg, thread->tid); }
 
@@ -193,10 +210,14 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
         virtual void setSyscallReturn(SyscallReturn return_value);
 
-        virtual void syscall() { return cpu->syscall(thread->tid); }
+        virtual void syscall(int64_t callnum)
+        { return cpu->syscall(callnum, thread->tid); }
 
         virtual Counter readFuncExeInst() { return thread->funcExeInst; }
 #endif
+        virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
+                                          TheISA::RegFile::ContextVal val)
+        { panic("Not supported on Alpha!"); }
     };
 
 #if FULL_SYSTEM
@@ -211,52 +232,43 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
 #if FULL_SYSTEM
     /** Translates instruction requestion. */
-    Fault translateInstReq(MemReqPtr &req)
+    Fault translateInstReq(RequestPtr &req)
     {
         return itb->translate(req);
     }
 
     /** Translates data read request. */
-    Fault translateDataReadReq(MemReqPtr &req)
+    Fault translateDataReadReq(RequestPtr &req)
     {
         return dtb->translate(req, false);
     }
 
     /** Translates data write request. */
-    Fault translateDataWriteReq(MemReqPtr &req)
+    Fault translateDataWriteReq(RequestPtr &req)
     {
         return dtb->translate(req, true);
     }
 
 #else
-    Fault dummyTranslation(MemReqPtr &req)
-    {
-#if 0
-        assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
-        // put the asid in the upper 16 bits of the paddr
-        req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
-        req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
-        return NoFault;
-    }
-
     /** Translates instruction requestion in syscall emulation mode. */
-    Fault translateInstReq(MemReqPtr &req)
+    Fault translateInstReq(RequestPtr &req)
     {
-        return dummyTranslation(req);
+        int tid = req->getThreadNum();
+        return this->thread[tid]->process->pTable->translate(req);
     }
 
     /** Translates data read request in syscall emulation mode. */
-    Fault translateDataReadReq(MemReqPtr &req)
+    Fault translateDataReadReq(RequestPtr &req)
     {
-        return dummyTranslation(req);
+        int tid = req->getThreadNum();
+        return this->thread[tid]->process->pTable->translate(req);
     }
 
     /** Translates data write request in syscall emulation mode. */
-    Fault translateDataWriteReq(MemReqPtr &req)
+    Fault translateDataWriteReq(RequestPtr &req)
     {
-        return dummyTranslation(req);
+        int tid = req->getThreadNum();
+        return this->thread[tid]->process->pTable->translate(req);
     }
 
 #endif
@@ -298,7 +310,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
     /** Executes a syscall.
      * @todo: Determine if this needs to be virtual.
      */
-    void syscall(int thread_num);
+    void syscall(int64_t callnum, int thread_num);
     /** Gets a syscall argument. */
     IntReg getSyscallArg(int i, int tid);
 
@@ -311,7 +323,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
     /** Read from memory function. */
     template <class T>
-    Fault read(MemReqPtr &req, T &data)
+    Fault read(RequestPtr &req, T &data)
     {
 #if 0
 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA
@@ -338,14 +350,14 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
     /** CPU read function, forwards read to LSQ. */
     template <class T>
-    Fault read(MemReqPtr &req, T &data, int load_idx)
+    Fault read(RequestPtr &req, T &data, int load_idx)
     {
         return this->iew.ldstQueue.read(req, data, load_idx);
     }
 
     /** Write to memory function. */
     template <class T>
-    Fault write(MemReqPtr &req, T &data)
+    Fault write(RequestPtr &req, T &data)
     {
 #if 0
 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA
@@ -417,7 +429,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
 
     /** CPU write function, forwards write to LSQ. */
     template <class T>
-    Fault write(MemReqPtr &req, T &data, int store_idx)
+    Fault write(RequestPtr &req, T &data, int store_idx)
     {
         return this->iew.ldstQueue.write(req, data, store_idx);
     }
diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc
index b0d812edc..6ac408364 100644
--- a/src/cpu/o3/alpha_cpu_builder.cc
+++ b/src/cpu/o3/alpha_cpu_builder.cc
@@ -33,7 +33,6 @@
 #include "cpu/o3/alpha_impl.hh"
 #include "cpu/o3/alpha_params.hh"
 #include "cpu/o3/fu_pool.hh"
-#include "mem/cache/base_cache.hh"
 #include "sim/builder.hh"
 
 class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
@@ -60,7 +59,7 @@ SimObjectVectorParam<Process *> workload;
 //SimObjectParam<PageTable *> page_table;
 #endif // FULL_SYSTEM
 
-SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<MemObject *> mem;
 
 SimObjectParam<BaseCPU *> checker;
 
@@ -69,9 +68,6 @@ Param<Counter> max_insts_all_threads;
 Param<Counter> max_loads_any_thread;
 Param<Counter> max_loads_all_threads;
 
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
 Param<unsigned> cachePorts;
 
 Param<unsigned> decodeToFetchDelay;
@@ -169,7 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
 //    INIT_PARAM(page_table, "Page table"),
 #endif // FULL_SYSTEM
 
-    INIT_PARAM_DFLT(mem, "Memory", NULL),
+    INIT_PARAM(mem, "Memory"),
 
     INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
 
@@ -188,9 +184,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
                     "count",
                     0),
 
-    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
-    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
     INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
 
     INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
@@ -327,8 +320,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
     //
     // Caches
     //
-    params->icacheInterface = icache ? icache->getInterface() : NULL;
-    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
     params->cachePorts = cachePorts;
 
     params->decodeToFetchDelay = decodeToFetchDelay;
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
index f7f0a3842..a890cfd90 100644
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ b/src/cpu/o3/alpha_cpu_impl.hh
@@ -31,7 +31,6 @@
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/checker/exec_context.hh"
-#include "mem/mem_interface.hh"
 #include "sim/sim_events.hh"
 #include "sim/stats.hh"
 
@@ -68,11 +67,9 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
         this->thread[i]->setStatus(ExecContext::Suspended);
 #else
         if (i < params->workload.size()) {
-            DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
-                    "process is %#x",
-                    i, params->workload[i]->prog_entry, this->thread[i]);
+            DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
+                    i, this->thread[i]);
             this->thread[i] = new Thread(this, i, params->workload[i], i);
-            assert(params->workload[i]->getMemory() != NULL);
 
             this->thread[i]->setStatus(ExecContext::Suspended);
             //usedTids[i] = true;
@@ -160,7 +157,7 @@ void
 AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
 {
     // some things should already be set up
-    assert(getMemPtr() == old_context->getMemPtr());
+    assert(getMemPort() == old_context->getMemPort());
 #if FULL_SYSTEM
     assert(getSystemPtr() == old_context->getSystemPtr());
 #else
@@ -366,15 +363,14 @@ AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
     }
 
     // Then loop through the floating point registers.
-    for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
-    {
-        renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
-        this->cpuXC->setFloatRegBits(i,
-            this->regFile.readFloatRegBits(renamed_reg));
+    for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+        renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag);
+        cpu->setFloatRegBits(renamed_reg,
+                             xc->readFloatRegBits(i));
     }
 
     // Copy the misc regs.
-    cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+    copyMiscRegs(xc, this);
 
     // Then finally set the PC and the next PC.
     cpu->setPC(xc->readPC(), tid);
@@ -398,24 +394,40 @@ AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
 }
 
 template <class Impl>
-float
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
 {
     DPRINTF(Fault, "Reading float register through the XC!\n");
-    return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+    switch(width) {
+      case 32:
+        return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+      case 64:
+        return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+      default:
+        panic("Unsupported width!");
+        return 0;
+    }
 }
 
 template <class Impl>
-double
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
 {
     DPRINTF(Fault, "Reading float register through the XC!\n");
-    return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+    return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
 }
 
 template <class Impl>
-uint64_t
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx, int width)
+{
+    DPRINTF(Fault, "Reading floatint register through the XC!\n");
+    return cpu->readArchFloatRegInt(reg_idx, thread->tid);
+}
+
+template <class Impl>
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
 {
     DPRINTF(Fault, "Reading floatint register through the XC!\n");
     return cpu->readArchFloatRegInt(reg_idx, thread->tid);
@@ -435,10 +447,17 @@ AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
 
 template <class Impl>
 void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
 {
     DPRINTF(Fault, "Setting float register through the XC!\n");
-    cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+    switch(width) {
+      case 32:
+        cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+        break;
+      case 64:
+        cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+        break;
+    }
 
     if (!thread->trapPending && !thread->inSyscall) {
         cpu->squashFromXC(thread->tid);
@@ -447,10 +466,23 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
 
 template <class Impl>
 void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
 {
     DPRINTF(Fault, "Setting float register through the XC!\n");
-    cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+    cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+
+    if (!thread->trapPending && !thread->inSyscall) {
+        cpu->squashFromXC(thread->tid);
+    }
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val,
+                                             int width)
+{
+    DPRINTF(Fault, "Setting floatint register through the XC!\n");
+    cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
 
     if (!thread->trapPending && !thread->inSyscall) {
         cpu->squashFromXC(thread->tid);
@@ -459,7 +491,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
 
 template <class Impl>
 void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
 {
     DPRINTF(Fault, "Setting floatint register through the XC!\n");
     cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
@@ -723,7 +755,7 @@ AlphaFullCPU<Impl>::processInterrupts()
 
 template <class Impl>
 void
-AlphaFullCPU<Impl>::syscall(int tid)
+AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid)
 {
     DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid);
 
@@ -734,7 +766,7 @@ AlphaFullCPU<Impl>::syscall(int tid)
     ++(this->thread[tid]->funcExeInst);
 
     // Execute the actual syscall.
-    this->thread[tid]->syscall();
+    this->thread[tid]->syscall(callnum);
 
     // Decrease funcExeInst by one as the normal commit will handle
     // incrementing it.
diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh
index b03c8c337..f289bbf0d 100644
--- a/src/cpu/o3/alpha_dyn_inst.hh
+++ b/src/cpu/o3/alpha_dyn_inst.hh
@@ -29,11 +29,14 @@
 #ifndef __CPU_O3_ALPHA_DYN_INST_HH__
 #define __CPU_O3_ALPHA_DYN_INST_HH__
 
+#include "arch/isa_traits.hh"
 #include "cpu/base_dyn_inst.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/alpha_cpu.hh"
 #include "cpu/o3/alpha_impl.hh"
 
+class Packet;
+
 /**
  * Mostly implementation & ISA specific AlphaDynInst. As with most
  * other classes in the new CPU model, it is templated on the Impl to
@@ -56,6 +59,8 @@ class AlphaDynInst : public BaseDynInst<Impl>
     typedef TheISA::RegIndex RegIndex;
     /** Integer register index type. */
     typedef TheISA::IntReg   IntReg;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
     /** Misc register index type. */
     typedef TheISA::MiscReg  MiscReg;
 
@@ -79,7 +84,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
     Fault initiateAcc();
 
     /** Completes the access.  Only valid for memory operations. */
-    Fault completeAcc();
+    Fault completeAcc(Packet *pkt);
 
   private:
     /** Initializes variables. */
@@ -123,7 +128,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
     bool simPalCheck(int palFunc);
 #else
     /** Calls a syscall. */
-    void syscall();
+    void syscall(int64_t callnum);
 #endif
 
   private:
diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh
index 541d5ab82..16c236b4c 100644
--- a/src/cpu/o3/alpha_dyn_inst_impl.hh
+++ b/src/cpu/o3/alpha_dyn_inst_impl.hh
@@ -96,15 +96,13 @@ AlphaDynInst<Impl>::initiateAcc()
 
 template <class Impl>
 Fault
-AlphaDynInst<Impl>::completeAcc()
+AlphaDynInst<Impl>::completeAcc(Packet *pkt)
 {
     if (this->isLoad()) {
-        this->fault = this->staticInst->completeAcc(this->req->data,
-                                                    this,
+        this->fault = this->staticInst->completeAcc(pkt, this,
                                                     this->traceData);
     } else if (this->isStore()) {
-        this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
-                                                    this,
+        this->fault = this->staticInst->completeAcc(pkt, this,
                                                     this->traceData);
     } else {
         panic("Unknown type!");
@@ -168,9 +166,9 @@ AlphaDynInst<Impl>::simPalCheck(int palFunc)
 #else
 template <class Impl>
 void
-AlphaDynInst<Impl>::syscall()
+AlphaDynInst<Impl>::syscall(int64_t callnum)
 {
-    this->cpu->syscall(this->threadNumber);
+    this->cpu->syscall(callnum, this->threadNumber);
 }
 #endif
 
diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh
index e3acf2c05..04366e8dd 100644
--- a/src/cpu/o3/alpha_params.hh
+++ b/src/cpu/o3/alpha_params.hh
@@ -35,8 +35,7 @@
 class AlphaDTB;
 class AlphaITB;
 class FUPool;
-class FunctionalMemory;
-class MemInterface;
+class MemObject;
 class Process;
 class System;
 
@@ -60,7 +59,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params
     //Page Table
 //    PageTable *pTable;
 
-    FunctionalMemory *mem;
+    MemObject *mem;
 
     BaseCPU *checker;
 
@@ -69,8 +68,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params
     //
     // Caches
     //
-    MemInterface *icacheInterface;
-    MemInterface *dcacheInterface;
+//    MemInterface *icacheInterface;
+//    MemInterface *dcacheInterface;
 
     unsigned cachePorts;
 
diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc
index 92344111f..dcc5ceb80 100644
--- a/src/cpu/o3/bpred_unit.cc
+++ b/src/cpu/o3/bpred_unit.cc
@@ -30,8 +30,8 @@
 #include "cpu/o3/alpha_impl.hh"
 #include "cpu/o3/alpha_dyn_inst.hh"
 #include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
 
 template class TwobitBPredUnit<AlphaSimpleImpl>;
 template class TwobitBPredUnit<OzoneImpl>;
-template class TwobitBPredUnit<SimpleImpl>;
+//template class TwobitBPredUnit<SimpleImpl>;
diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh
index b9ff42573..c7dc1808b 100644
--- a/src/cpu/o3/btb.hh
+++ b/src/cpu/o3/btb.hh
@@ -31,6 +31,7 @@
 
 // For Addr type.
 #include "arch/isa_traits.hh"
+#include "base/misc.hh"
 
 class DefaultBTB
 {
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 66abf8dc6..c019ef4c7 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -34,7 +34,6 @@
 #include "base/timebuf.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/inst_seq.hh"
-#include "mem/memory_interface.hh"
 
 template <class>
 class O3ThreadState;
@@ -301,9 +300,6 @@ class DefaultCommit
     /** Pointer to FullCPU. */
     FullCPU *cpu;
 
-    /** Memory interface.  Used for d-cache accesses. */
-    MemInterface *dcacheInterface;
-
     std::vector<Thread *> thread;
 
     Fault fetchFault;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 346a8bc1c..97703c430 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -64,8 +64,7 @@ DefaultCommit<Impl>::TrapEvent::description()
 
 template <class Impl>
 DefaultCommit<Impl>::DefaultCommit(Params *params)
-    : dcacheInterface(params->dcacheInterface),
-      squashCounter(0),
+    : squashCounter(0),
       iewToCommitDelay(params->iewToCommitDelay),
       commitToIEWDelay(params->commitToIEWDelay),
       renameToROBDelay(params->renameToROBDelay),
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index ed02a845b..4e0bb2d2d 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -46,6 +46,7 @@
 #include "sim/stat_control.hh"
 
 using namespace std;
+using namespace TheISA;
 
 BaseFullCPU::BaseFullCPU(Params *params)
     : BaseCPU(params), cpu_id(0)
@@ -121,14 +122,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
       system(params->system),
       memCtrl(system->memctrl),
       physmem(system->physmem),
-      mem(params->mem),
-#else
-//      pTable(params->pTable),
-      mem(params->workload[0]->getMemory()),
 #endif // FULL_SYSTEM
+      mem(params->mem),
       switchCount(0),
-      icacheInterface(params->icacheInterface),
-      dcacheInterface(params->dcacheInterface),
       deferRegistration(params->deferRegistration),
       numThreads(number_of_threads)
 {
@@ -782,6 +778,7 @@ FullO3CPU<Impl>::readFloatReg(int reg_idx)
 template <class Impl>
 FloatRegBits
 FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
+{
     return regFile.readFloatRegBits(reg_idx, width);
 }
 
@@ -843,7 +840,7 @@ FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
     int idx = reg_idx + TheISA::FP_Base_DepTag;
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
 
-    return regFile.readFloatRegSingle(phys_reg);
+    return regFile.readFloatReg(phys_reg);
 }
 
 template <class Impl>
@@ -853,7 +850,7 @@ FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
     int idx = reg_idx + TheISA::FP_Base_DepTag;
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
 
-    return regFile.readFloatRegDouble(phys_reg);
+    return regFile.readFloatReg(phys_reg, 64);
 }
 
 template <class Impl>
@@ -863,7 +860,7 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
     int idx = reg_idx + TheISA::FP_Base_DepTag;
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
 
-    return regFile.readFloatRegInt(phys_reg);
+    return regFile.readFloatRegBits(phys_reg);
 }
 
 template <class Impl>
@@ -881,7 +878,7 @@ FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
 {
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
 
-    regFile.setFloatRegSingle(phys_reg, val);
+    regFile.setFloatReg(phys_reg, val);
 }
 
 template <class Impl>
@@ -890,7 +887,7 @@ FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
 {
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
 
-    regFile.setFloatRegDouble(phys_reg, val);
+    regFile.setFloatReg(phys_reg, val, 64);
 }
 
 template <class Impl>
@@ -899,7 +896,7 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
 {
     PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
 
-    regFile.setFloatRegInt(phys_reg, val);
+    regFile.setFloatRegBits(phys_reg, val);
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index bed95ad54..c791b2948 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -35,6 +35,7 @@
 #include <set>
 #include <vector>
 
+#include "arch/isa_traits.hh"
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "config/full_system.hh"
@@ -50,7 +51,7 @@
 template <class>
 class Checker;
 class ExecContext;
-class MemInterface;
+class MemObject;
 class Process;
 
 class BaseFullCPU : public BaseCPU
@@ -63,6 +64,8 @@ class BaseFullCPU : public BaseCPU
 
     void regStats();
 
+    int readCpuId() { return cpu_id; }
+
   protected:
     int cpu_id;
 };
@@ -71,6 +74,9 @@ template <class Impl>
 class FullO3CPU : public BaseFullCPU
 {
   public:
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
+
     // Typedefs from the Impl here.
     typedef typename Impl::CPUPol CPUPolicy;
     typedef typename Impl::Params Params;
@@ -226,14 +232,6 @@ class FullO3CPU : public BaseFullCPU
     int getDataAsid(unsigned tid)
     { return regFile.miscRegs[tid].getDataAsid(); }
 #else
-    /** Check if this address is a valid instruction address. */
-    bool validInstAddr(Addr addr,unsigned tid)
-    { return thread[tid]->validInstAddr(addr); }
-
-    /** Check if this address is a valid data address. */
-    bool validDataAddr(Addr addr,unsigned tid)
-    { return thread[tid]->validDataAddr(addr); }
-
     /** Get instruction asid. */
     int getInstAsid(unsigned tid)
     { return thread[tid]->asid; }
@@ -259,13 +257,13 @@ class FullO3CPU : public BaseFullCPU
 
     void setIntReg(int reg_idx, uint64_t val);
 
-    void setFloatReg(int reg_idx, FloatReg val, int width);
+    void setFloatReg(int reg_idx, FloatReg val);
 
     void setFloatReg(int reg_idx, FloatReg val, int width);
 
     void setFloatRegBits(int reg_idx, FloatRegBits val);
 
-    void setFloatRegBits(int reg_idx, FloatRegBits val);
+    void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
 
     uint64_t readArchIntReg(int reg_idx, unsigned tid);
 
@@ -464,7 +462,7 @@ class FullO3CPU : public BaseFullCPU
 #endif
 
     /** Pointer to memory. */
-    FunctionalMemory *mem;
+    MemObject *mem;
 
     Sampler *sampler;
 
diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh
new file mode 100644
index 000000000..f8ae38da4
--- /dev/null
+++ b/src/cpu/o3/dep_graph.hh
@@ -0,0 +1,213 @@
+
+#ifndef __CPU_O3_DEP_GRAPH_HH__
+#define __CPU_O3_DEP_GRAPH_HH__
+
+#include "cpu/o3/comm.hh"
+
+template <class DynInstPtr>
+class DependencyEntry
+{
+  public:
+    DependencyEntry()
+        : inst(NULL), next(NULL)
+    { }
+
+    DynInstPtr inst;
+    //Might want to include data about what arch. register the
+    //dependence is waiting on.
+    DependencyEntry<DynInstPtr> *next;
+};
+
+template <class DynInstPtr>
+class DependencyGraph
+{
+  public:
+    typedef DependencyEntry<DynInstPtr> DepEntry;
+
+    DependencyGraph()
+        : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
+    { }
+
+    void resize(int num_entries);
+
+    void reset();
+
+    void insert(PhysRegIndex idx, DynInstPtr &new_inst);
+
+    void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
+    { dependGraph[idx].inst = new_inst; }
+
+    void clearInst(PhysRegIndex idx)
+    { dependGraph[idx].inst = NULL; }
+
+    void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
+
+    DynInstPtr pop(PhysRegIndex idx);
+
+    bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
+
+    /** Debugging function to dump out the dependency graph.
+     */
+    void dump();
+
+  private:
+    /** Array of linked lists.  Each linked list is a list of all the
+     *  instructions that depend upon a given register.  The actual
+     *  register's index is used to index into the graph; ie all
+     *  instructions in flight that are dependent upon r34 will be
+     *  in the linked list of dependGraph[34].
+     */
+    DepEntry *dependGraph;
+
+    int numEntries;
+
+    // Debug variable, remove when done testing.
+    unsigned memAllocCounter;
+
+  public:
+    uint64_t nodesTraversed;
+    uint64_t nodesRemoved;
+};
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::resize(int num_entries)
+{
+    numEntries = num_entries;
+    dependGraph = new DepEntry[numEntries];
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::reset()
+{
+    // Clear the dependency graph
+    DepEntry *curr;
+    DepEntry *prev;
+
+    for (int i = 0; i < numEntries; ++i) {
+        curr = dependGraph[i].next;
+
+        while (curr) {
+            memAllocCounter--;
+
+            prev = curr;
+            curr = prev->next;
+            prev->inst = NULL;
+
+            delete prev;
+        }
+
+        if (dependGraph[i].inst) {
+            dependGraph[i].inst = NULL;
+        }
+
+        dependGraph[i].next = NULL;
+    }
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
+{
+    //Add this new, dependent instruction at the head of the dependency
+    //chain.
+
+    // First create the entry that will be added to the head of the
+    // dependency chain.
+    DepEntry *new_entry = new DepEntry;
+    new_entry->next = dependGraph[idx].next;
+    new_entry->inst = new_inst;
+
+    // Then actually add it to the chain.
+    dependGraph[idx].next = new_entry;
+
+    ++memAllocCounter;
+}
+
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
+                                    DynInstPtr &inst_to_remove)
+{
+    DepEntry *prev = &dependGraph[idx];
+    DepEntry *curr = dependGraph[idx].next;
+
+    // Make sure curr isn't NULL.  Because this instruction is being
+    // removed from a dependency list, it must have been placed there at
+    // an earlier time.  The dependency chain should not be empty,
+    // unless the instruction dependent upon it is already ready.
+    if (curr == NULL) {
+        return;
+    }
+
+    nodesRemoved++;
+
+    // Find the instruction to remove within the dependency linked list.
+    while (curr->inst != inst_to_remove) {
+        prev = curr;
+        curr = curr->next;
+        nodesTraversed++;
+
+        assert(curr != NULL);
+    }
+
+    // Now remove this instruction from the list.
+    prev->next = curr->next;
+
+    --memAllocCounter;
+
+    // Could push this off to the destructor of DependencyEntry
+    curr->inst = NULL;
+
+    delete curr;
+}
+
+template <class DynInstPtr>
+DynInstPtr
+DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
+{
+    DepEntry *node;
+    node = dependGraph[idx].next;
+    DynInstPtr inst = NULL;
+    if (node) {
+        inst = node->inst;
+        dependGraph[idx].next = node->next;
+        node->inst = NULL;
+        memAllocCounter--;
+        delete node;
+    }
+    return inst;
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::dump()
+{
+    DepEntry *curr;
+
+    for (int i = 0; i < numEntries; ++i)
+    {
+        curr = &dependGraph[i];
+
+        if (curr->inst) {
+            cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+                    i, curr->inst->readPC(), curr->inst->seqNum);
+        } else {
+            cprintf("dependGraph[%i]: No producer. consumer: ", i);
+        }
+
+        while (curr->next != NULL) {
+            curr = curr->next;
+
+            cprintf("%#x [sn:%lli] ",
+                    curr->inst->readPC(), curr->inst->seqNum);
+        }
+
+        cprintf("\n");
+    }
+    cprintf("memAllocCounter: %i\n", memAllocCounter);
+}
+
+#endif // __CPU_O3_DEP_GRAPH_HH__
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 3fcfdc3a1..2b1d93cb7 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -29,10 +29,12 @@
 #ifndef __CPU_O3_FETCH_HH__
 #define __CPU_O3_FETCH_HH__
 
+#include "arch/utility.hh"
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/pc_event.hh"
-#include "mem/mem_interface.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
 #include "sim/eventq.hh"
 
 class Sampler;
@@ -65,6 +67,32 @@ class DefaultFetch
     typedef TheISA::MachInst MachInst;
     typedef TheISA::ExtMachInst ExtMachInst;
 
+    class IcachePort : public Port
+    {
+      protected:
+        DefaultFetch<Impl> *fetch;
+
+      public:
+        IcachePort(DefaultFetch<Impl> *_fetch)
+            : Port(_fetch->name() + "-iport"), fetch(_fetch)
+        { }
+
+      protected:
+        virtual Tick recvAtomic(PacketPtr pkt);
+
+        virtual void recvFunctional(PacketPtr pkt);
+
+        virtual void recvStatusChange(Status status);
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+
+        virtual bool recvTiming(PacketPtr pkt);
+
+        virtual void recvRetry();
+    };
+
   public:
     /** Overall fetch status. Used to determine if the CPU can
      * deschedule itsef due to a lack of activity.
@@ -84,8 +112,9 @@ class DefaultFetch
         TrapPending,
         QuiescePending,
         SwitchOut,
-        IcacheMissStall,
-        IcacheMissComplete
+        IcacheWaitResponse,
+        IcacheRetry,
+        IcacheAccessComplete
     };
 
     /** Fetching Policy, Add new policies here.*/
@@ -110,28 +139,6 @@ class DefaultFetch
     /** List that has the threads organized by priority. */
     std::list<unsigned> priorityList;
 
-  public:
-    class CacheCompletionEvent : public Event
-    {
-      private:
-        MemReqPtr req;
-        /** Pointer to fetch. */
-        DefaultFetch *fetch;
-        /** Thread id. */
-//        unsigned threadId;
-
-      public:
-        /** Constructs a cache completion event, which tells fetch when the
-         * cache miss is complete.
-         */
-        CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch);
-
-        /** Processes cache completion event. */
-        virtual void process();
-        /** Returns the description of the cache completion event. */
-        virtual const char *description();
-    };
-
   public:
     /** DefaultFetch constructor. */
     DefaultFetch(Params *params);
@@ -161,7 +168,7 @@ class DefaultFetch
     void initStage();
 
     /** Processes cache completion event. */
-    void processCacheCompletion(MemReqPtr &req);
+    void processCacheCompletion(PacketPtr pkt);
 
     void switchOut();
 
@@ -295,8 +302,10 @@ class DefaultFetch
     /** Wire used to write any information heading to decode. */
     typename TimeBuffer<FetchStruct>::wire toDecode;
 
+    MemObject *mem;
+
     /** Icache interface. */
-    MemInterface *icacheInterface;
+    IcachePort *icachePort;
 
     /** BPredUnit. */
     BPredUnit branchPred;
@@ -305,8 +314,8 @@ class DefaultFetch
 
     Addr nextPC[Impl::MaxThreads];
 
-    /** Memory request used to access cache. */
-    MemReqPtr memReq[Impl::MaxThreads];
+    /** Memory packet used to access cache. */
+    PacketPtr memPkt[Impl::MaxThreads];
 
     /** Variable that tracks if fetch has written to the time buffer this
      * cycle. Used to tell CPU if there is activity this cycle.
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 1c5e508f6..a80afbcf4 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -27,12 +27,13 @@
  */
 
 #include "arch/isa_traits.hh"
+#include "arch/utility.hh"
 #include "cpu/exetrace.hh"
 #include "cpu/o3/fetch.hh"
-#include "mem/base_mem.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
 #include "sim/byteswap.hh"
+#include "sim/host.hh"
 #include "sim/root.hh"
 
 #if FULL_SYSTEM
@@ -42,42 +43,67 @@
 #include "mem/functional/memory_control.hh"
 #include "mem/functional/physical.hh"
 #include "sim/system.hh"
-#else // !FULL_SYSTEM
-#include "mem/functional/functional.hh"
 #endif // FULL_SYSTEM
 
 #include <algorithm>
 
 using namespace std;
+using namespace TheISA;
 
 template<class Impl>
-DefaultFetch<Impl>::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req,
-                                                               DefaultFetch *_fetch)
-    : Event(&mainEventQueue, Delayed_Writeback_Pri),
-      req(_req),
-      fetch(_fetch)
+Tick
+DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
 {
-    this->setFlags(Event::AutoDelete);
+    panic("DefaultFetch doesn't expect recvAtomic callback!");
+    return curTick;
 }
 
 template<class Impl>
 void
-DefaultFetch<Impl>::CacheCompletionEvent::process()
+DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+{
+    panic("DefaultFetch doesn't expect recvFunctional callback!");
+}
+
+template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
+{
+    if (status == RangeChange)
+        return;
+
+    panic("DefaultFetch doesn't expect recvStatusChange callback!");
+}
+
+template<class Impl>
+bool
+DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt)
 {
-    fetch->processCacheCompletion(req);
+    fetch->processCacheCompletion(pkt);
+    return true;
 }
 
 template<class Impl>
-const char *
-DefaultFetch<Impl>::CacheCompletionEvent::description()
+void
+DefaultFetch<Impl>::IcachePort::recvRetry()
 {
-    return "DefaultFetch cache completion event";
+    panic("DefaultFetch doesn't support retry yet.");
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+/*
+    assert(cpu->dcache_pkt != NULL);
+    assert(cpu->_status == DcacheRetry);
+    Packet *tmp = cpu->dcache_pkt;
+    if (sendTiming(tmp)) {
+        cpu->_status = DcacheWaitResponse;
+        cpu->dcache_pkt = NULL;
+    }
+*/
 }
 
 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(Params *params)
-    : icacheInterface(params->icacheInterface),
-      branchPred(params),
+    : branchPred(params),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
       iewToFetchDelay(params->iewToFetchDelay),
@@ -122,7 +148,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
     }
 
     // Size of cache block.
-    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+    cacheBlkSize = 64;
 
     // Create mask to get rid of offset bits.
     cacheBlkMask = (cacheBlkSize - 1);
@@ -133,8 +159,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
 
         priorityList.push_back(tid);
 
-        // Create a new memory request.
-        memReq[tid] = NULL;
+        memPkt[tid] = NULL;
 
         // Create space to store a cache line.
         cacheData[tid] = new uint8_t[cacheBlkSize];
@@ -253,6 +278,9 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
     DPRINTF(Fetch, "Setting the CPU pointer.\n");
     cpu = cpu_ptr;
 
+    // Name is finally available, so create the port.
+    icachePort = new IcachePort(this);
+
     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
     switchToActive();
@@ -315,9 +343,9 @@ DefaultFetch<Impl>::initStage()
 
 template<class Impl>
 void
-DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
+DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
-    unsigned tid = req->thread_num;
+    unsigned tid = pkt->req->getThreadNum();
 
     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
 
@@ -325,10 +353,11 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
     // to return.
     // Can keep track of how many cache accesses go unused due to
     // misspeculation here.
-    if (fetchStatus[tid] != IcacheMissStall ||
-        req != memReq[tid] ||
+    if (fetchStatus[tid] != IcacheWaitResponse ||
+        pkt != memPkt[tid] ||
         isSwitchedOut()) {
         ++fetchIcacheSquashes;
+        delete pkt;
         return;
     }
 
@@ -341,17 +370,19 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
 
     switchToActive();
 
-    // Only switch to IcacheMissComplete if we're not stalled as well.
+    // Only switch to IcacheAccessComplete if we're not stalled as well.
     if (checkStall(tid)) {
         fetchStatus[tid] = Blocked;
     } else {
-        fetchStatus[tid] = IcacheMissComplete;
+        fetchStatus[tid] = IcacheAccessComplete;
     }
 
 //    memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
 
     // Reset the mem req to NULL.
-    memReq[tid] = NULL;
+    delete pkt->req;
+    delete pkt;
+    memPkt[tid] = NULL;
 }
 
 template <class Impl>
@@ -475,18 +506,15 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 
     // Setup the memReq to do a read of the first instruction's address.
     // Set the appropriate read size and flags as well.
-    memReq[tid] = new MemReq();
+    // Build request here.
+    RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
+                                     fetch_PC, cpu->readCpuId(), tid);
 
-    memReq[tid]->asid = tid;
-    memReq[tid]->thread_num = tid;
-    memReq[tid]->data = new uint8_t[64];
-    memReq[tid]->xc = cpu->xcBase(tid);
-    memReq[tid]->cmd = Read;
-    memReq[tid]->reset(fetch_PC, cacheBlkSize, flags);
+    memPkt[tid] = NULL;
 
     // Translate the instruction request.
 //#if FULL_SYSTEM
-    fault = cpu->translateInstReq(memReq[tid]);
+    fault = cpu->translateInstReq(mem_req);
 //#else
 //    fault = pTable->translate(memReq[tid]);
 //#endif
@@ -508,48 +536,31 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         }
 #endif
 
+        // Build packet here.
+        PacketPtr data_pkt = new Packet(mem_req,
+                                        Packet::ReadReq, Packet::Broadcast);
+        data_pkt->dataStatic(cacheData[tid]);
+
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
-        fault = cpu->mem->read(memReq[tid], cacheData[tid]);
-        // This read may change when the mem interface changes.
+
+        fetchedCacheLines++;
 
         // Now do the timing access to see whether or not the instruction
         // exists within the cache.
-        if (icacheInterface && !icacheInterface->isBlocked()) {
-            DPRINTF(Fetch, "Doing cache access.\n");
-
-            memReq[tid]->completionEvent = NULL;
-
-            memReq[tid]->time = curTick;
-
-            MemAccessResult result = icacheInterface->access(memReq[tid]);
-
-            fetchedCacheLines++;
-
-            // If the cache missed, then schedule an event to wake
-            // up this stage once the cache miss completes.
-            // @todo: Possibly allow for longer than 1 cycle cache hits.
-            if (result != MA_HIT && icacheInterface->doEvents()) {
-
-                memReq[tid]->completionEvent =
-                    new CacheCompletionEvent(memReq[tid], this);
-
-                lastIcacheStall[tid] = curTick;
-
-                DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache "
-                        "miss.\n", tid);
-
-                fetchStatus[tid] = IcacheMissStall;
-            } else {
-                DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction "
-                        "read.\n", tid);
-
-//                memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
-            }
-        } else {
+        if (!icachePort->sendTiming(data_pkt)) {
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
             ret_fault = NoFault;
             return false;
         }
+
+        DPRINTF(Fetch, "Doing cache access.\n");
+
+        lastIcacheStall[tid] = curTick;
+
+        DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
+                "response.\n", tid);
+
+        fetchStatus[tid] = IcacheWaitResponse;
     }
 
     ret_fault = fault;
@@ -567,10 +578,11 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
     nextPC[tid] = new_PC + instSize;
 
     // Clear the icache miss if it's outstanding.
-    if (fetchStatus[tid] == IcacheMissStall && icacheInterface) {
+    if (fetchStatus[tid] == IcacheWaitResponse) {
         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
                 tid);
-        memReq[tid] = NULL;
+        delete memPkt[tid];
+        memPkt[tid] = NULL;
     }
 
     fetchStatus[tid] = Squashing;
@@ -632,12 +644,12 @@ DefaultFetch<Impl>::updateFetchStatus()
 
         if (fetchStatus[tid] == Running ||
             fetchStatus[tid] == Squashing ||
-            fetchStatus[tid] == IcacheMissComplete) {
+            fetchStatus[tid] == IcacheAccessComplete) {
 
             if (_status == Inactive) {
                 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
 
-                if (fetchStatus[tid] == IcacheMissComplete) {
+                if (fetchStatus[tid] == IcacheAccessComplete) {
                     DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
                             "completion\n",tid);
                 }
@@ -831,7 +843,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
         }
     }
 
-    if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
+    if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
         DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
 
         fetchStatus[tid] = Blocked;
@@ -882,7 +894,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     // If returning from the delay of a cache miss, then update the status
     // to running, otherwise do the cache access.  Possibly move this up
     // to tick() function.
-    if (fetchStatus[tid] == IcacheMissComplete) {
+    if (fetchStatus[tid] == IcacheAccessComplete) {
         DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
                 tid);
 
@@ -905,11 +917,11 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             ++fetchBlockedCycles;
         } else if (fetchStatus[tid] == Squashing) {
             ++fetchSquashCycles;
-        } else if (fetchStatus[tid] == IcacheMissStall) {
+        } else if (fetchStatus[tid] == IcacheWaitResponse) {
             ++icacheStallCycles;
         }
 
-        // Status is Idle, Squashing, Blocked, or IcacheMissStall, so
+        // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
         // fetch should do nothing.
         return;
     }
@@ -917,7 +929,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     ++fetchCycles;
 
     // If we had a stall due to an icache miss, then return.
-    if (fetchStatus[tid] == IcacheMissStall) {
+    if (fetchStatus[tid] == IcacheWaitResponse) {
         ++icacheStallCycles;
         status_change = true;
         return;
@@ -1026,7 +1038,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     } else {
         // We shouldn't be in an icache miss and also have a fault (an ITB
         // miss)
-        if (fetchStatus[tid] == IcacheMissStall) {
+        if (fetchStatus[tid] == IcacheWaitResponse) {
             panic("Fetch should have exited prior to this!");
         }
 
@@ -1107,7 +1119,7 @@ DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
         int tid = *((*activeThreads).begin());
 
         if (fetchStatus[tid] == Running ||
-            fetchStatus[tid] == IcacheMissComplete ||
+            fetchStatus[tid] == IcacheAccessComplete ||
             fetchStatus[tid] == Idle) {
             return tid;
         } else {
@@ -1133,7 +1145,7 @@ DefaultFetch<Impl>::roundRobin()
         assert(high_pri <= numThreads);
 
         if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheMissComplete ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle) {
 
             priorityList.erase(pri_iter);
@@ -1167,7 +1179,7 @@ DefaultFetch<Impl>::iqCount()
         unsigned high_pri = PQ.top();
 
         if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheMissComplete ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle)
             return high_pri;
         else
@@ -1198,7 +1210,7 @@ DefaultFetch<Impl>::lsqCount()
         unsigned high_pri = PQ.top();
 
         if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheMissComplete ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
            fetchStatus[high_pri] == Idle)
             return high_pri;
         else
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index 29e84cd44..daf1007c1 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -33,6 +33,7 @@
 #include <queue>
 
 #include "arch/isa_traits.hh"
+#include "base/misc.hh"
 #include "base/trace.hh"
 #include "base/traceflags.hh"
 #include "cpu/o3/comm.hh"
diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc
new file mode 100644
index 000000000..fb2b5c00d
--- /dev/null
+++ b/src/cpu/o3/fu_pool.cc
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sstream>
+
+#include "cpu/o3/fu_pool.hh"
+#include "encumbered/cpu/full/fu_pool.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  A pool of function units
+//
+
+inline void
+FUPool::FUIdxQueue::addFU(int fu_idx)
+{
+    funcUnitsIdx.push_back(fu_idx);
+    ++size;
+}
+
+inline int
+FUPool::FUIdxQueue::getFU()
+{
+    int retval = funcUnitsIdx[idx++];
+
+    if (idx == size)
+        idx = 0;
+
+    return retval;
+}
+
+FUPool::~FUPool()
+{
+    fuListIterator i = funcUnits.begin();
+    fuListIterator end = funcUnits.end();
+    for (; i != end; ++i)
+        delete *i;
+}
+
+
+// Constructor
+FUPool::FUPool(string name, vector<FUDesc *> paramList)
+    : SimObject(name)
+{
+    numFU = 0;
+
+    funcUnits.clear();
+
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        maxOpLatencies[i] = 0;
+        maxIssueLatencies[i] = 0;
+    }
+
+    //
+    //  Iterate through the list of FUDescData structures
+    //
+    for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
+
+        //
+        //  Don't bother with this if we're not going to create any FU's
+        //
+        if ((*i)->number) {
+            //
+            //  Create the FuncUnit object from this structure
+            //   - add the capabilities listed in the FU's operation
+            //     description
+            //
+            //  We create the first unit, then duplicate it as needed
+            //
+            FuncUnit *fu = new FuncUnit;
+
+            OPDDiterator j = (*i)->opDescList.begin();
+            OPDDiterator end = (*i)->opDescList.end();
+            for (; j != end; ++j) {
+                // indicate that this pool has this capability
+                capabilityList.set((*j)->opClass);
+
+                // Add each of the FU's that will have this capability to the
+                // appropriate queue.
+                for (int k = 0; k < (*i)->number; ++k)
+                    fuPerCapList[(*j)->opClass].addFU(numFU + k);
+
+                // indicate that this FU has the capability
+                fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
+
+                if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
+                    maxOpLatencies[(*j)->opClass] = (*j)->opLat;
+
+                if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
+                    maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
+            }
+
+            numFU++;
+
+            //  Add the appropriate number of copies of this FU to the list
+            ostringstream s;
+
+            s << (*i)->name() << "(0)";
+            fu->name = s.str();
+            funcUnits.push_back(fu);
+
+            for (int c = 1; c < (*i)->number; ++c) {
+                ostringstream s;
+                numFU++;
+                FuncUnit *fu2 = new FuncUnit(*fu);
+
+                s << (*i)->name() << "(" << c << ")";
+                fu2->name = s.str();
+                funcUnits.push_back(fu2);
+            }
+        }
+    }
+
+    unitBusy.resize(numFU);
+
+    for (int i = 0; i < numFU; i++) {
+        unitBusy[i] = false;
+    }
+}
+
+void
+FUPool::annotateMemoryUnits(unsigned hit_latency)
+{
+    maxOpLatencies[MemReadOp] = hit_latency;
+
+    fuListIterator i = funcUnits.begin();
+    fuListIterator iend = funcUnits.end();
+    for (; i != iend; ++i) {
+        if ((*i)->provides(MemReadOp))
+            (*i)->opLatency(MemReadOp) = hit_latency;
+
+        if ((*i)->provides(MemWriteOp))
+            (*i)->opLatency(MemWriteOp) = hit_latency;
+    }
+}
+
+int
+FUPool::getUnit(OpClass capability)
+{
+    //  If this pool doesn't have the specified capability,
+    //  return this information to the caller
+    if (!capabilityList[capability])
+        return -2;
+
+    int fu_idx = fuPerCapList[capability].getFU();
+    int start_idx = fu_idx;
+
+    // Iterate through the circular queue if needed, stopping if we've reached
+    // the first element again.
+    while (unitBusy[fu_idx]) {
+        fu_idx = fuPerCapList[capability].getFU();
+        if (fu_idx == start_idx) {
+            // No FU available
+            return -1;
+        }
+    }
+
+    unitBusy[fu_idx] = true;
+
+    return fu_idx;
+}
+
+void
+FUPool::freeUnitNextCycle(int fu_idx)
+{
+    assert(unitBusy[fu_idx]);
+    unitsToBeFreed.push_back(fu_idx);
+}
+
+void
+FUPool::processFreeUnits()
+{
+    while (!unitsToBeFreed.empty()) {
+        int fu_idx = unitsToBeFreed.back();
+        unitsToBeFreed.pop_back();
+
+        assert(unitBusy[fu_idx]);
+
+        unitBusy[fu_idx] = false;
+    }
+}
+
+void
+FUPool::dump()
+{
+    cout << "Function Unit Pool (" << name() << ")\n";
+    cout << "======================================\n";
+    cout << "Free List:\n";
+
+    for (int i = 0; i < numFU; ++i) {
+        if (unitBusy[i]) {
+            continue;
+        }
+
+        cout << "  [" << i << "] : ";
+
+        cout << funcUnits[i]->name << " ";
+
+        cout << "\n";
+    }
+
+    cout << "======================================\n";
+    cout << "Busy List:\n";
+    for (int i = 0; i < numFU; ++i) {
+        if (!unitBusy[i]) {
+            continue;
+        }
+
+        cout << "  [" << i << "] : ";
+
+        cout << funcUnits[i]->name << " ";
+
+        cout << "\n";
+    }
+}
+
+void
+FUPool::switchOut()
+{
+}
+
+void
+FUPool::takeOverFrom()
+{
+    for (int i = 0; i < numFU; i++) {
+        unitBusy[i] = false;
+    }
+    unitsToBeFreed.clear();
+}
+
+//
+
+////////////////////////////////////////////////////////////////////////////
+//
+//  The SimObjects we use to get the FU information into the simulator
+//
+////////////////////////////////////////////////////////////////////////////
+
+//
+//    FUPool - Contails a list of FUDesc objects to make available
+//
+
+//
+//  The FuPool object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+    SimObjectVectorParam<FUDesc *> FUList;
+
+END_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+    INIT_PARAM(FUList, "list of FU's for this pool")
+
+END_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+
+CREATE_SIM_OBJECT(FUPool)
+{
+    return new FUPool(getInstanceName(), FUList);
+}
+
+REGISTER_SIM_OBJECT("FUPool", FUPool)
+
diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh
new file mode 100644
index 000000000..f590c4149
--- /dev/null
+++ b/src/cpu/o3/fu_pool.hh
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_FU_POOL_HH__
+#define __CPU_O3_FU_POOL_HH__
+
+#include <bitset>
+#include <list>
+#include <string>
+#include <vector>
+
+#include "base/sched_list.hh"
+#include "cpu/op_class.hh"
+#include "sim/sim_object.hh"
+
+class FUDesc;
+class FuncUnit;
+
+/**
+ * Pool of FU's, specific to the new CPU model. The old FU pool had lists of
+ * free units and busy units, and whenever a FU was needed it would iterate
+ * through the free units to find a FU that provided the capability. This pool
+ * has lists of units specific to each of the capabilities, and whenever a FU
+ * is needed, it iterates through that list to find a free unit. The previous
+ * FU pool would have to be ticked each cycle to update which units became
+ * free. This FU pool lets the IEW stage handle freeing units, which frees
+ * them as their scheduled execution events complete. This limits units in this
+ * model to either have identical issue and op latencies, or 1 cycle issue
+ * latencies.
+ */
+class FUPool : public SimObject
+{
+  private:
+    /** Maximum op execution latencies, per op class. */
+    unsigned maxOpLatencies[Num_OpClasses];
+    /** Maximum issue latencies, per op class. */
+    unsigned maxIssueLatencies[Num_OpClasses];
+
+    /** Bitvector listing capabilities of this FU pool. */
+    std::bitset<Num_OpClasses> capabilityList;
+
+    /** Bitvector listing which FUs are busy. */
+    std::vector<bool> unitBusy;
+
+    /** List of units to be freed at the end of this cycle. */
+    std::vector<int> unitsToBeFreed;
+
+    /**
+     * Class that implements a circular queue to hold FU indices. The hope is
+     * that FUs that have been just used will be moved to the end of the queue
+     * by iterating through it, thus leaving free units at the head of the
+     * queue.
+     */
+    class FUIdxQueue {
+      public:
+        /** Constructs a circular queue of FU indices. */
+        FUIdxQueue()
+            : idx(0), size(0)
+        { }
+
+        /** Adds a FU to the queue. */
+        inline void addFU(int fu_idx);
+
+        /** Returns the index of the FU at the head of the queue, and changes
+         *  the index to the next element.
+         */
+        inline int getFU();
+
+      private:
+        /** Circular queue index. */
+        int idx;
+
+        /** Size of the queue. */
+        int size;
+
+        /** Queue of FU indices. */
+        std::vector<int> funcUnitsIdx;
+    };
+
+    /** Per op class queues of FUs that provide that capability. */
+    FUIdxQueue fuPerCapList[Num_OpClasses];
+
+    /** Number of FUs. */
+    int numFU;
+
+    /** Functional units. */
+    std::vector<FuncUnit *> funcUnits;
+
+    typedef std::vector<FuncUnit *>::iterator fuListIterator;
+
+  public:
+
+    /** Constructs a FU pool. */
+    FUPool(std::string name, std::vector<FUDesc *> l);
+    ~FUPool();
+
+    /** Annotates units that provide memory operations. Included only because
+     *  old FU pool provided this function.
+     */
+    void annotateMemoryUnits(unsigned hit_latency);
+
+    /**
+     * Gets a FU providing the requested capability. Will mark the unit as busy,
+     * but leaves the freeing of the unit up to the IEW stage.
+     * @param capability The capability requested.
+     * @return Returns -2 if the FU pool does not have the capability, -1 if
+     * there is no free FU, and the FU's index otherwise.
+     */
+    int getUnit(OpClass capability);
+
+    /** Frees a FU at the end of this cycle. */
+    void freeUnitNextCycle(int fu_idx);
+
+    /** Frees all FUs on the list. */
+    void processFreeUnits();
+
+    /** Returns the total number of FUs. */
+    int size() { return numFU; }
+
+    /** Debugging function used to dump FU information. */
+    void dump();
+
+    /** Returns the operation execution latency of the given capability. */
+    unsigned getOpLatency(OpClass capability) {
+        return maxOpLatencies[capability];
+    }
+
+    /** Returns the issue latency of the given capability. */
+    unsigned getIssueLatency(OpClass capability) {
+        return maxIssueLatencies[capability];
+    }
+
+    void switchOut();
+    void takeOverFrom();
+};
+
+#endif // __CPU_O3_FU_POOL_HH__
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 935320628..c931669c6 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -110,25 +110,6 @@ class DefaultIEW
     /** Writeback status. */
     StageStatus wbStatus;
 
-  public:
-    /** LdWriteback event for a load completion. */
-    class LdWritebackEvent : public Event {
-      private:
-        /** Instruction that is writing back data to the register file. */
-        DynInstPtr inst;
-        /** Pointer to IEW stage. */
-        DefaultIEW<Impl> *iewStage;
-
-      public:
-        /** Constructs a load writeback event. */
-        LdWritebackEvent(DynInstPtr &_inst, DefaultIEW<Impl> *_iew);
-
-        /** Processes writeback event. */
-        virtual void process();
-        /** Returns the description of the writeback event. */
-        virtual const char *description();
-    };
-
   public:
     /** Constructs a DefaultIEW with the given parameters. */
     DefaultIEW(Params *params);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index b0137d7fc..955ebfdf3 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -38,58 +38,6 @@
 
 using namespace std;
 
-template<class Impl>
-DefaultIEW<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
-                                                     DefaultIEW<Impl> *_iew)
-    : Event(&mainEventQueue), inst(_inst), iewStage(_iew)
-{
-    this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-DefaultIEW<Impl>::LdWritebackEvent::process()
-{
-    DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
-    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
-    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
-    if (iewStage->isSwitchedOut()) {
-        inst = NULL;
-        return;
-    } else if (inst->isSquashed()) {
-        iewStage->wakeCPU();
-        inst = NULL;
-        return;
-    }
-
-    iewStage->wakeCPU();
-
-    if (!inst->isExecuted()) {
-        inst->setExecuted();
-
-        // Complete access to copy data to proper place.
-        if (inst->isStore()) {
-            inst->completeAcc();
-        }
-    }
-
-    // Need to insert instruction into queue to commit
-    iewStage->instToCommit(inst);
-
-    iewStage->activityThisCycle();
-
-    inst = NULL;
-}
-
-template<class Impl>
-const char *
-DefaultIEW<Impl>::LdWritebackEvent::description()
-{
-    return "Load writeback event";
-}
-
 template<class Impl>
 DefaultIEW<Impl>::DefaultIEW(Params *params)
     : // @todo: Make this into a parameter.
@@ -1280,7 +1228,7 @@ DefaultIEW<Impl>::executeInsts()
                 ldstQueue.executeStore(inst);
 
                 // If the store had a fault then it may not have a mem req
-                if (inst->req && !(inst->req->flags & LOCKED)) {
+                if (inst->req && !(inst->req->getFlags() & LOCKED)) {
                     inst->setExecuted();
 
                     instToCommit(inst);
@@ -1556,7 +1504,7 @@ DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
     else
         iewExecutedInsts++;
 #else
-    iewExecutedInsts[thread_number]++;
+    iewExecutedInsts++;
 #endif
 
     //
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index 518de73d9..843f6a8fe 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -38,7 +38,7 @@
 #include "base/timebuf.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/dep_graph.hh"
-#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/op_class.hh"
 #include "sim/host.hh"
 
 class FUPool;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index f1dc4e01f..4fa756cb6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -64,8 +64,7 @@ InstructionQueue<Impl>::FUCompletion::description()
 
 template <class Impl>
 InstructionQueue<Impl>::InstructionQueue(Params *params)
-    : dcacheInterface(params->dcacheInterface),
-      fuPool(params->fuPool),
+    : fuPool(params->fuPool),
       numEntries(params->numIQEntries),
       totalWidth(params->issueWidth),
       numPhysIntRegs(params->numPhysIntRegs),
diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc
new file mode 100644
index 000000000..8991ab8f8
--- /dev/null
+++ b/src/cpu/o3/lsq.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQ<AlphaSimpleImpl>;
+
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
new file mode 100644
index 000000000..51eb23cd7
--- /dev/null
+++ b/src/cpu/o3/lsq.hh
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_HH__
+#define __CPU_O3_LSQ_HH__
+
+#include <map>
+#include <queue>
+
+#include "config/full_system.hh"
+#include "cpu/inst_seq.hh"
+//#include "cpu/o3/cpu_policy.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+template <class Impl>
+class LSQ {
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::CPUPol::IEW IEW;
+    typedef typename Impl::CPUPol::LSQUnit LSQUnit;
+
+    enum LSQPolicy {
+        Dynamic,
+        Partitioned,
+        Threshold
+    };
+
+    /** Constructs an LSQ with the given parameters. */
+    LSQ(Params *params);
+
+    /** Returns the name of the LSQ. */
+    std::string name() const;
+
+    /** Sets the pointer to the list of active threads. */
+    void setActiveThreads(std::list<unsigned> *at_ptr);
+    /** Sets the CPU pointer. */
+    void setCPU(FullCPU *cpu_ptr);
+    /** Sets the IEW stage pointer. */
+    void setIEW(IEW *iew_ptr);
+    /** Sets the page table pointer. */
+//    void setPageTable(PageTable *pt_ptr);
+
+    void switchOut();
+    void takeOverFrom();
+
+    /** Number of entries needed for the given amount of threads.*/
+    int entryAmount(int num_threads);
+    void removeEntries(unsigned tid);
+    /** Reset the max entries for each thread. */
+    void resetEntries();
+    /** Resize the max entries for a thread. */
+    void resizeEntries(unsigned size, unsigned tid);
+
+    /** Ticks the LSQ. */
+    void tick();
+    /** Ticks a specific LSQ Unit. */
+    void tick(unsigned tid)
+    { thread[tid].tick(); }
+
+    /** Inserts a load into the LSQ. */
+    void insertLoad(DynInstPtr &load_inst);
+    /** Inserts a store into the LSQ. */
+    void insertStore(DynInstPtr &store_inst);
+
+    /** Executes a load. */
+    Fault executeLoad(DynInstPtr &inst);
+
+    Fault executeLoad(int lq_idx, unsigned tid)
+    { return thread[tid].executeLoad(lq_idx); }
+
+    /** Executes a store. */
+    Fault executeStore(DynInstPtr &inst);
+
+    /**
+     * Commits loads up until the given sequence number for a specific thread.
+     */
+    void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
+    { thread[tid].commitLoads(youngest_inst); }
+
+    /**
+     * Commits stores up until the given sequence number for a specific thread.
+     */
+    void commitStores(InstSeqNum &youngest_inst, unsigned tid)
+    { thread[tid].commitStores(youngest_inst); }
+
+    /**
+     * Attempts to write back stores until all cache ports are used or the
+     * interface becomes blocked.
+     */
+    void writebackStores();
+    /** Same as above, but only for one thread. */
+    void writebackStores(unsigned tid);
+
+    /**
+     * Squash instructions from a thread until the specified sequence number.
+     */
+    void squash(const InstSeqNum &squashed_num, unsigned tid)
+    { thread[tid].squash(squashed_num); }
+
+    /** Returns whether or not there was a memory ordering violation. */
+    bool violation();
+    /**
+     * Returns whether or not there was a memory ordering violation for a
+     * specific thread.
+     */
+    bool violation(unsigned tid)
+    { return thread[tid].violation(); }
+
+    /** Returns if a load is blocked due to the memory system for a specific
+     *  thread.
+     */
+    bool loadBlocked(unsigned tid)
+    { return thread[tid].loadBlocked(); }
+
+    bool isLoadBlockedHandled(unsigned tid)
+    { return thread[tid].isLoadBlockedHandled(); }
+
+    void setLoadBlockedHandled(unsigned tid)
+    { thread[tid].setLoadBlockedHandled(); }
+
+    /** Gets the instruction that caused the memory ordering violation. */
+    DynInstPtr getMemDepViolator(unsigned tid)
+    { return thread[tid].getMemDepViolator(); }
+
+    /** Returns the head index of the load queue for a specific thread. */
+    int getLoadHead(unsigned tid)
+    { return thread[tid].getLoadHead(); }
+
+    /** Returns the sequence number of the head of the load queue. */
+    InstSeqNum getLoadHeadSeqNum(unsigned tid)
+    {
+        return thread[tid].getLoadHeadSeqNum();
+    }
+
+    /** Returns the head index of the store queue. */
+    int getStoreHead(unsigned tid)
+    { return thread[tid].getStoreHead(); }
+
+    /** Returns the sequence number of the head of the store queue. */
+    InstSeqNum getStoreHeadSeqNum(unsigned tid)
+    {
+        return thread[tid].getStoreHeadSeqNum();
+    }
+
+    /** Returns the number of instructions in all of the queues. */
+    int getCount();
+    /** Returns the number of instructions in the queues of one thread. */
+    int getCount(unsigned tid)
+    { return thread[tid].getCount(); }
+
+    /** Returns the total number of loads in the load queue. */
+    int numLoads();
+    /** Returns the total number of loads for a single thread. */
+    int numLoads(unsigned tid)
+    { return thread[tid].numLoads(); }
+
+    /** Returns the total number of stores in the store queue. */
+    int numStores();
+    /** Returns the total number of stores for a single thread. */
+    int numStores(unsigned tid)
+    { return thread[tid].numStores(); }
+
+    /** Returns the total number of loads that are ready. */
+    int numLoadsReady();
+    /** Returns the number of loads that are ready for a single thread. */
+    int numLoadsReady(unsigned tid)
+    { return thread[tid].numLoadsReady(); }
+
+    /** Returns the number of free entries. */
+    unsigned numFreeEntries();
+    /** Returns the number of free entries for a specific thread. */
+    unsigned numFreeEntries(unsigned tid);
+
+    /** Returns if the LSQ is full (either LQ or SQ is full). */
+    bool isFull();
+    /**
+     * Returns if the LSQ is full for a specific thread (either LQ or SQ is
+     * full).
+     */
+    bool isFull(unsigned tid);
+
+    /** Returns if any of the LQs are full. */
+    bool lqFull();
+    /** Returns if the LQ of a given thread is full. */
+    bool lqFull(unsigned tid);
+
+    /** Returns if any of the SQs are full. */
+    bool sqFull();
+    /** Returns if the SQ of a given thread is full. */
+    bool sqFull(unsigned tid);
+
+    /**
+     * Returns if the LSQ is stalled due to a memory operation that must be
+     * replayed.
+     */
+    bool isStalled();
+    /**
+     * Returns if the LSQ of a specific thread is stalled due to a memory
+     * operation that must be replayed.
+     */
+    bool isStalled(unsigned tid);
+
+    /** Returns whether or not there are any stores to write back to memory. */
+    bool hasStoresToWB();
+
+    /** Returns whether or not a specific thread has any stores to write back
+     * to memory.
+     */
+    bool hasStoresToWB(unsigned tid)
+    { return thread[tid].hasStoresToWB(); }
+
+    /** Returns the number of stores a specific thread has to write back. */
+    int  numStoresToWB(unsigned tid)
+    { return thread[tid].numStoresToWB(); }
+
+    /** Returns if the LSQ will write back to memory this cycle. */
+    bool willWB();
+    /** Returns if the LSQ of a specific thread will write back to memory this
+     * cycle.
+     */
+    bool willWB(unsigned tid)
+    { return thread[tid].willWB(); }
+
+    /** Debugging function to print out all instructions. */
+    void dumpInsts();
+    /** Debugging function to print out instructions from a specific thread. */
+    void dumpInsts(unsigned tid)
+    { thread[tid].dumpInsts(); }
+
+    /** Executes a read operation, using the load specified at the load index. */
+    template <class T>
+    Fault read(RequestPtr req, T &data, int load_idx);
+
+    /** Executes a store operation, using the store specified at the store
+     *   index.
+     */
+    template <class T>
+    Fault write(RequestPtr req, T &data, int store_idx);
+
+  private:
+    /** The LSQ policy for SMT mode. */
+    LSQPolicy lsqPolicy;
+
+    /** The LSQ units for individual threads. */
+    LSQUnit thread[Impl::MaxThreads];
+
+    /** The CPU pointer. */
+    FullCPU *cpu;
+
+    /** The IEW stage pointer. */
+    IEW *iewStage;
+
+    /** The pointer to the page table. */
+//    PageTable *pTable;
+
+    /** List of Active Threads in System. */
+    std::list<unsigned> *activeThreads;
+
+    /** Total Size of LQ Entries. */
+    unsigned LQEntries;
+    /** Total Size of SQ Entries. */
+    unsigned SQEntries;
+
+    /** Max LQ Size - Used to Enforce Sharing Policies. */
+    unsigned maxLQEntries;
+
+    /** Max SQ Size - Used to Enforce Sharing Policies. */
+    unsigned maxSQEntries;
+
+    /** Number of Threads. */
+    unsigned numThreads;
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+    unsigned tid = req->getThreadNum();
+
+    return thread[tid].read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+    unsigned tid = req->getThreadNum();
+
+    return thread[tid].write(req, data, store_idx);
+}
+
+#endif // __CPU_O3_LSQ_HH__
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
new file mode 100644
index 000000000..a6ad27522
--- /dev/null
+++ b/src/cpu/o3/lsq_impl.hh
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <string>
+
+#include "cpu/o3/lsq.hh"
+
+using namespace std;
+
+template <class Impl>
+LSQ<Impl>::LSQ(Params *params)
+    : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
+      numThreads(params->numberOfThreads)
+{
+    DPRINTF(LSQ, "Creating LSQ object.\n");
+
+    //**********************************************/
+    //************ Handle SMT Parameters ***********/
+    //**********************************************/
+    string policy = params->smtLSQPolicy;
+
+    //Convert string to lowercase
+    std::transform(policy.begin(), policy.end(), policy.begin(),
+                   (int(*)(int)) tolower);
+
+    //Figure out fetch policy
+    if (policy == "dynamic") {
+        lsqPolicy = Dynamic;
+
+        maxLQEntries = LQEntries;
+        maxSQEntries = SQEntries;
+
+        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
+
+    } else if (policy == "partitioned") {
+        lsqPolicy = Partitioned;
+
+        //@todo:make work if part_amt doesnt divide evenly.
+        maxLQEntries = LQEntries / numThreads;
+        maxSQEntries = SQEntries / numThreads;
+
+        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
+                "%i entries per LQ | %i entries per SQ",
+                maxLQEntries,maxSQEntries);
+
+    } else if (policy == "threshold") {
+        lsqPolicy = Threshold;
+
+        assert(params->smtLSQThreshold > LQEntries);
+        assert(params->smtLSQThreshold > SQEntries);
+
+        //Divide up by threshold amount
+        //@todo: Should threads check the max and the total
+        //amount of the LSQ
+        maxLQEntries  = params->smtLSQThreshold;
+        maxSQEntries  = params->smtLSQThreshold;
+
+        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
+                "%i entries per LQ | %i entries per SQ",
+                maxLQEntries,maxSQEntries);
+
+    } else {
+        assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
+                    "Partitioned, Threshold}");
+    }
+
+    //Initialize LSQs
+    for (int tid=0; tid < numThreads; tid++) {
+        thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+    }
+}
+
+
+template<class Impl>
+std::string
+LSQ<Impl>::name() const
+{
+    return iewStage->name() + ".lsq";
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
+    activeThreads = at_ptr;
+    assert(activeThreads != 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+    cpu = cpu_ptr;
+
+    for (int tid=0; tid < numThreads; tid++) {
+        thread[tid].setCPU(cpu_ptr);
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setIEW(IEW *iew_ptr)
+{
+    iewStage = iew_ptr;
+
+    for (int tid=0; tid < numThreads; tid++) {
+        thread[tid].setIEW(iew_ptr);
+    }
+}
+
+#if 0
+template<class Impl>
+void
+LSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+    for (int tid=0; tid < numThreads; tid++) {
+        thread[tid].setPageTable(pt_ptr);
+    }
+}
+#endif
+
+template <class Impl>
+void
+LSQ<Impl>::switchOut()
+{
+    for (int tid = 0; tid < numThreads; tid++) {
+        thread[tid].switchOut();
+    }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::takeOverFrom()
+{
+    for (int tid = 0; tid < numThreads; tid++) {
+        thread[tid].takeOverFrom();
+    }
+}
+
+template <class Impl>
+int
+LSQ<Impl>::entryAmount(int num_threads)
+{
+    if (lsqPolicy == Partitioned) {
+        return LQEntries / num_threads;
+    } else {
+        return 0;
+    }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::resetEntries()
+{
+    if (lsqPolicy != Dynamic || numThreads > 1) {
+        int active_threads = (*activeThreads).size();
+
+        list<unsigned>::iterator threads  = (*activeThreads).begin();
+        list<unsigned>::iterator list_end = (*activeThreads).end();
+
+        int maxEntries;
+
+        if (lsqPolicy == Partitioned) {
+            maxEntries = LQEntries / active_threads;
+        } else if (lsqPolicy == Threshold && active_threads == 1) {
+            maxEntries = LQEntries;
+        } else {
+            maxEntries = LQEntries;
+        }
+
+        while (threads != list_end) {
+            resizeEntries(maxEntries,*threads++);
+        }
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::removeEntries(unsigned tid)
+{
+    thread[tid].clearLQ();
+    thread[tid].clearSQ();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+{
+    thread[tid].resizeLQ(size);
+    thread[tid].resizeSQ(size);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::tick()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+
+        thread[tid].tick();
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+    unsigned tid = load_inst->threadNumber;
+
+    thread[tid].insertLoad(load_inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+    unsigned tid = store_inst->threadNumber;
+
+    thread[tid].insertStore(store_inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+    unsigned tid = inst->threadNumber;
+
+    return thread[tid].executeLoad(inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeStore(DynInstPtr &inst)
+{
+    unsigned tid = inst->threadNumber;
+
+    return thread[tid].executeStore(inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::writebackStores()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+
+        if (numStoresToWB(tid) > 0) {
+            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+                "available for Writeback.\n", tid, numStoresToWB(tid));
+        }
+
+        thread[tid].writebackStores();
+    }
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::violation()
+{
+    /* Answers: Does Anybody Have a Violation?*/
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (thread[tid].violation())
+            return true;
+    }
+
+    return false;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::getCount()
+{
+    unsigned total = 0;
+
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        total += getCount(tid);
+    }
+
+    return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoads()
+{
+    unsigned total = 0;
+
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        total += numLoads(tid);
+    }
+
+    return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numStores()
+{
+    unsigned total = 0;
+
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        total += thread[tid].numStores();
+    }
+
+    return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoadsReady()
+{
+    unsigned total = 0;
+
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        total += thread[tid].numLoadsReady();
+    }
+
+    return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries()
+{
+    unsigned total = 0;
+
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        total += thread[tid].numFreeEntries();
+    }
+
+    return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries(unsigned tid)
+{
+    //if( lsqPolicy == Dynamic )
+    //return numFreeEntries();
+    //else
+        return thread[tid].numFreeEntries();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (! (thread[tid].lqFull() || thread[tid].sqFull()) )
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull(unsigned tid)
+{
+    //@todo: Change to Calculate All Entries for
+    //Dynamic Policy
+    if( lsqPolicy == Dynamic )
+        return isFull();
+    else
+        return thread[tid].lqFull() || thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (!thread[tid].lqFull())
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull(unsigned tid)
+{
+    //@todo: Change to Calculate All Entries for
+    //Dynamic Policy
+    if( lsqPolicy == Dynamic )
+        return lqFull();
+    else
+        return thread[tid].lqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (!sqFull(tid))
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull(unsigned tid)
+{
+     //@todo: Change to Calculate All Entries for
+    //Dynamic Policy
+    if( lsqPolicy == Dynamic )
+        return sqFull();
+    else
+        return thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (!thread[tid].isStalled())
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled(unsigned tid)
+{
+    if( lsqPolicy == Dynamic )
+        return isStalled();
+    else
+        return thread[tid].isStalled();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::hasStoresToWB()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (!hasStoresToWB(tid))
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::willWB()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        if (!willWB(tid))
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::dumpInsts()
+{
+    list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+    while (active_threads != (*activeThreads).end()) {
+        unsigned tid = *active_threads++;
+        thread[tid].dumpInsts();
+    }
+}
diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc
new file mode 100644
index 000000000..dd29007bc
--- /dev/null
+++ b/src/cpu/o3/lsq_unit.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_unit_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQUnit<AlphaSimpleImpl>;
+
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
new file mode 100644
index 000000000..b339cea2c
--- /dev/null
+++ b/src/cpu/o3/lsq_unit.hh
@@ -0,0 +1,629 @@
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_UNIT_HH__
+#define __CPU_O3_LSQ_UNIT_HH__
+
+#include <algorithm>
+#include <map>
+#include <queue>
+
+#include "arch/faults.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+//#include "sim/debug.hh"
+//#include "sim/sim_object.hh"
+
+/**
+ * Class that implements the actual LQ and SQ for each specific
+ * thread.  Both are circular queues; load entries are freed upon
+ * committing, while store entries are freed once they writeback. The
+ * LSQUnit tracks if there are memory ordering violations, and also
+ * detects partial load to store forwarding cases (a store only has
+ * part of a load's data) that requires the load to wait until the
+ * store writes back. In the former case it holds onto the instruction
+ * until the dependence unit looks at it, and in the latter it stalls
+ * the LSQ until the store writes back. At that point the load is
+ * replayed.
+ */
+template <class Impl>
+class LSQUnit {
+  protected:
+    typedef TheISA::IntReg IntReg;
+  public:
+    typedef typename Impl::Params Params;
+    typedef typename Impl::FullCPU FullCPU;
+    typedef typename Impl::DynInstPtr DynInstPtr;
+    typedef typename Impl::CPUPol::IEW IEW;
+    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+
+  public:
+    /** Constructs an LSQ unit. init() must be called prior to use. */
+    LSQUnit();
+
+    /** Initializes the LSQ unit with the specified number of entries. */
+    void init(Params *params, unsigned maxLQEntries,
+              unsigned maxSQEntries, unsigned id);
+
+    /** Returns the name of the LSQ unit. */
+    std::string name() const;
+
+    /** Sets the CPU pointer. */
+    void setCPU(FullCPU *cpu_ptr);
+
+    /** Sets the IEW stage pointer. */
+    void setIEW(IEW *iew_ptr)
+    { iewStage = iew_ptr; }
+
+    /** Sets the page table pointer. */
+//    void setPageTable(PageTable *pt_ptr);
+
+    void switchOut();
+
+    void takeOverFrom();
+
+    bool isSwitchedOut() { return switchedOut; }
+
+    /** Ticks the LSQ unit, which in this case only resets the number of
+     * used cache ports.
+     * @todo: Move the number of used ports up to the LSQ level so it can
+     * be shared by all LSQ units.
+     */
+    void tick() { usedPorts = 0; }
+
+    /** Inserts an instruction. */
+    void insert(DynInstPtr &inst);
+    /** Inserts a load instruction. */
+    void insertLoad(DynInstPtr &load_inst);
+    /** Inserts a store instruction. */
+    void insertStore(DynInstPtr &store_inst);
+
+    /** Executes a load instruction. */
+    Fault executeLoad(DynInstPtr &inst);
+
+    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
+    /** Executes a store instruction. */
+    Fault executeStore(DynInstPtr &inst);
+
+    /** Commits the head load. */
+    void commitLoad();
+    /** Commits loads older than a specific sequence number. */
+    void commitLoads(InstSeqNum &youngest_inst);
+
+    /** Commits stores older than a specific sequence number. */
+    void commitStores(InstSeqNum &youngest_inst);
+
+    /** Writes back stores. */
+    void writebackStores();
+
+    void completeDataAccess(PacketPtr pkt);
+
+    void completeStoreDataAccess(DynInstPtr &inst);
+
+    // @todo: Include stats in the LSQ unit.
+    //void regStats();
+
+    /** Clears all the entries in the LQ. */
+    void clearLQ();
+
+    /** Clears all the entries in the SQ. */
+    void clearSQ();
+
+    /** Resizes the LQ to a given size. */
+    void resizeLQ(unsigned size);
+
+    /** Resizes the SQ to a given size. */
+    void resizeSQ(unsigned size);
+
+    /** Squashes all instructions younger than a specific sequence number. */
+    void squash(const InstSeqNum &squashed_num);
+
+    /** Returns if there is a memory ordering violation. Value is reset upon
+     * call to getMemDepViolator().
+     */
+    bool violation() { return memDepViolator; }
+
+    /** Returns the memory ordering violator. */
+    DynInstPtr getMemDepViolator();
+
+    /** Returns if a load became blocked due to the memory system. */
+    bool loadBlocked()
+    { return isLoadBlocked; }
+
+    void clearLoadBlocked()
+    { isLoadBlocked = false; }
+
+    bool isLoadBlockedHandled()
+    { return loadBlockedHandled; }
+
+    void setLoadBlockedHandled()
+    { loadBlockedHandled = true; }
+
+    /** Returns the number of free entries (min of free LQ and SQ entries). */
+    unsigned numFreeEntries();
+
+    /** Returns the number of loads ready to execute. */
+    int numLoadsReady();
+
+    /** Returns the number of loads in the LQ. */
+    int numLoads() { return loads; }
+
+    /** Returns the number of stores in the SQ. */
+    int numStores() { return stores; }
+
+    /** Returns if either the LQ or SQ is full. */
+    bool isFull() { return lqFull() || sqFull(); }
+
+    /** Returns if the LQ is full. */
+    bool lqFull() { return loads >= (LQEntries - 1); }
+
+    /** Returns if the SQ is full. */
+    bool sqFull() { return stores >= (SQEntries - 1); }
+
+    /** Returns the number of instructions in the LSQ. */
+    unsigned getCount() { return loads + stores; }
+
+    /** Returns if there are any stores to writeback. */
+    bool hasStoresToWB() { return storesToWB; }
+
+    /** Returns the number of stores to writeback. */
+    int numStoresToWB() { return storesToWB; }
+
+    /** Returns if the LSQ unit will writeback on this cycle. */
+    bool willWB() { return storeQueue[storeWBIdx].canWB &&
+                        !storeQueue[storeWBIdx].completed/* &&
+                                                            !dcacheInterface->isBlocked()*/; }
+
+  private:
+    /** Completes the store at the specified index. */
+    void completeStore(int store_idx);
+
+    /** Increments the given store index (circular queue). */
+    inline void incrStIdx(int &store_idx);
+    /** Decrements the given store index (circular queue). */
+    inline void decrStIdx(int &store_idx);
+    /** Increments the given load index (circular queue). */
+    inline void incrLdIdx(int &load_idx);
+    /** Decrements the given load index (circular queue). */
+    inline void decrLdIdx(int &load_idx);
+
+  public:
+    /** Debugging function to dump instructions in the LSQ. */
+    void dumpInsts();
+
+  private:
+    /** Pointer to the CPU. */
+    FullCPU *cpu;
+
+    /** Pointer to the IEW stage. */
+    IEW *iewStage;
+
+    MemObject *mem;
+
+    class DcachePort : public Port
+    {
+      protected:
+        FullCPU *cpu;
+        LSQUnit *lsq;
+
+      public:
+        DcachePort(FullCPU *_cpu, LSQUnit *_lsq)
+            : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
+        { }
+
+      protected:
+        virtual Tick recvAtomic(PacketPtr pkt);
+
+        virtual void recvFunctional(PacketPtr pkt);
+
+        virtual void recvStatusChange(Status status);
+
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            AddrRangeList &snoop)
+        { resp.clear(); snoop.clear(); }
+
+        virtual bool recvTiming(PacketPtr pkt);
+
+        virtual void recvRetry();
+    };
+
+    /** Pointer to the D-cache. */
+    DcachePort *dcachePort;
+
+    /** Pointer to the page table. */
+//    PageTable *pTable;
+
+  public:
+    struct SQEntry {
+        /** Constructs an empty store queue entry. */
+        SQEntry()
+            : inst(NULL), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0)
+        { }
+
+        /** Constructs a store queue entry for a given instruction. */
+        SQEntry(DynInstPtr &_inst)
+            : inst(_inst), req(NULL), size(0), data(0),
+              canWB(0), committed(0), completed(0)
+        { }
+
+        /** The store instruction. */
+        DynInstPtr inst;
+        /** The request for the store. */
+        RequestPtr req;
+        /** The size of the store. */
+        int size;
+        /** The store data. */
+        IntReg data;
+        /** Whether or not the store can writeback. */
+        bool canWB;
+        /** Whether or not the store is committed. */
+        bool committed;
+        /** Whether or not the store is completed. */
+        bool completed;
+    };
+
+  private:
+    /** The LSQUnit thread id. */
+    unsigned lsqID;
+
+    /** The store queue. */
+    std::vector<SQEntry> storeQueue;
+
+    /** The load queue. */
+    std::vector<DynInstPtr> loadQueue;
+
+    /** The number of LQ entries, plus a sentinel entry (circular queue).
+     *  @todo: Consider having var that records the true number of LQ entries.
+     */
+    unsigned LQEntries;
+    /** The number of SQ entries, plus a sentinel entry (circular queue).
+     *  @todo: Consider having var that records the true number of SQ entries.
+     */
+    unsigned SQEntries;
+
+    /** The number of load instructions in the LQ. */
+    int loads;
+    /** The number of store instructions in the SQ. */
+    int stores;
+    /** The number of store instructions in the SQ waiting to writeback. */
+    int storesToWB;
+
+    /** The index of the head instruction in the LQ. */
+    int loadHead;
+    /** The index of the tail instruction in the LQ. */
+    int loadTail;
+
+    /** The index of the head instruction in the SQ. */
+    int storeHead;
+    /** The index of the first instruction that may be ready to be
+     * written back, and has not yet been written back.
+     */
+    int storeWBIdx;
+    /** The index of the tail instruction in the SQ. */
+    int storeTail;
+
+    /// @todo Consider moving to a more advanced model with write vs read ports
+    /** The number of cache ports available each cycle. */
+    int cachePorts;
+
+    /** The number of used cache ports in this cycle. */
+    int usedPorts;
+
+    bool switchedOut;
+
+    //list<InstSeqNum> mshrSeqNums;
+
+    /** Wire to read information from the issue stage time queue. */
+    typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+    /** Whether or not the LSQ is stalled. */
+    bool stalled;
+    /** The store that causes the stall due to partial store to load
+     * forwarding.
+     */
+    InstSeqNum stallingStoreIsn;
+    /** The index of the above store. */
+    int stallingLoadIdx;
+
+    /** Whether or not a load is blocked due to the memory system. */
+    bool isLoadBlocked;
+
+    bool loadBlockedHandled;
+
+    InstSeqNum blockedLoadSeqNum;
+
+    /** The oldest load that caused a memory ordering violation. */
+    DynInstPtr memDepViolator;
+
+    // Will also need how many read/write ports the Dcache has.  Or keep track
+    // of that in stage that is one level up, and only call executeLoad/Store
+    // the appropriate number of times.
+/*
+    // total number of loads forwaded from LSQ stores
+    Stats::Vector<> lsq_forw_loads;
+
+    // total number of loads ignored due to invalid addresses
+    Stats::Vector<> inv_addr_loads;
+
+    // total number of software prefetches ignored due to invalid addresses
+    Stats::Vector<> inv_addr_swpfs;
+
+    // total non-speculative bogus addresses seen (debug var)
+    Counter sim_invalid_addrs;
+    Stats::Vector<> fu_busy;  //cumulative fu busy
+
+    // ready loads blocked due to memory disambiguation
+    Stats::Vector<> lsq_blocked_loads;
+
+    Stats::Scalar<> lsqInversion;
+*/
+  public:
+    /** Executes the load at the given index. */
+    template <class T>
+    Fault read(Request *req, T &data, int load_idx);
+
+    /** Executes the store at the given index. */
+    template <class T>
+    Fault write(Request *req, T &data, int store_idx);
+
+    /** Returns the index of the head load instruction. */
+    int getLoadHead() { return loadHead; }
+    /** Returns the sequence number of the head load instruction. */
+    InstSeqNum getLoadHeadSeqNum()
+    {
+        if (loadQueue[loadHead]) {
+            return loadQueue[loadHead]->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns the index of the head store instruction. */
+    int getStoreHead() { return storeHead; }
+    /** Returns the sequence number of the head store instruction. */
+    InstSeqNum getStoreHeadSeqNum()
+    {
+        if (storeQueue[storeHead].inst) {
+            return storeQueue[storeHead].inst->seqNum;
+        } else {
+            return 0;
+        }
+
+    }
+
+    /** Returns whether or not the LSQ unit is stalled. */
+    bool isStalled()  { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
+{
+    DynInstPtr load_inst = loadQueue[load_idx];
+
+    assert(load_inst);
+
+    assert(!load_inst->isExecuted());
+
+    // Make sure this isn't an uncacheable access
+    // A bit of a hackish way to get uncached accesses to work only if they're
+    // at the head of the LSQ and are ready to commit (at the head of the ROB
+    // too).
+    if (req->getFlags() & UNCACHEABLE &&
+        (load_idx != loadHead || !load_inst->reachedCommit)) {
+        iewStage->rescheduleMemInst(load_inst);
+        return TheISA::genMachineCheckFault();
+    }
+
+    // Check the SQ for any previous stores that might lead to forwarding
+    int store_idx = load_inst->sqIdx;
+
+    int store_size = 0;
+
+    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
+            "storeHead: %i addr: %#x\n",
+            load_idx, store_idx, storeHead, req->getPaddr());
+
+#if 0
+    if (req->getFlags() & LOCKED) {
+        cpu->lockAddr = req->getPaddr();
+        cpu->lockFlag = true;
+    }
+#endif
+
+    while (store_idx != -1) {
+        // End once we've reached the top of the LSQ
+        if (store_idx == storeWBIdx) {
+            break;
+        }
+
+        // Move the index to one younger
+        if (--store_idx < 0)
+            store_idx += SQEntries;
+
+        assert(storeQueue[store_idx].inst);
+
+        store_size = storeQueue[store_idx].size;
+
+        if (store_size == 0)
+            continue;
+
+        // Check if the store data is within the lower and upper bounds of
+        // addresses that the request needs.
+        bool store_has_lower_limit =
+            req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
+        bool store_has_upper_limit =
+            (req->getVaddr() + req->getSize()) <=
+            (storeQueue[store_idx].inst->effAddr + store_size);
+        bool lower_load_has_store_part =
+            req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
+                           store_size);
+        bool upper_load_has_store_part =
+            (req->getVaddr() + req->getSize()) >
+            storeQueue[store_idx].inst->effAddr;
+
+        // If the store's data has all of the data needed, we can forward.
+        if (store_has_lower_limit && store_has_upper_limit) {
+            // Get shift amount for offset into the store's data.
+            int shift_amt = req->getVaddr() & (store_size - 1);
+            // @todo: Magic number, assumes byte addressing
+            shift_amt = shift_amt << 3;
+
+            // Cast this to type T?
+            data = storeQueue[store_idx].data >> shift_amt;
+
+            assert(!load_inst->memData);
+            load_inst->memData = new uint8_t[64];
+
+            memcpy(load_inst->memData, &data, req->getSize());
+
+            DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
+                    "addr %#x, data %#x\n",
+                    store_idx, req->getVaddr(), *(load_inst->memData));
+/*
+            typename LdWritebackEvent *wb =
+                new typename LdWritebackEvent(load_inst,
+                                              iewStage);
+
+            // We'll say this has a 1 cycle load-store forwarding latency
+            // for now.
+            // @todo: Need to make this a parameter.
+            wb->schedule(curTick);
+*/
+            // Should keep track of stat for forwarded data
+            return NoFault;
+        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+                   (store_has_upper_limit && upper_load_has_store_part) ||
+                   (lower_load_has_store_part && upper_load_has_store_part)) {
+            // This is the partial store-load forwarding case where a store
+            // has only part of the load's data.
+
+            // If it's already been written back, then don't worry about
+            // stalling on it.
+            if (storeQueue[store_idx].completed) {
+                continue;
+            }
+
+            // Must stall load and force it to retry, so long as it's the oldest
+            // load that needs to do so.
+            if (!stalled ||
+                (stalled &&
+                 load_inst->seqNum <
+                 loadQueue[stallingLoadIdx]->seqNum)) {
+                stalled = true;
+                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+                stallingLoadIdx = load_idx;
+            }
+
+            // Tell IQ/mem dep unit that this instruction will need to be
+            // rescheduled eventually
+            iewStage->rescheduleMemInst(load_inst);
+
+            // Do not generate a writeback event as this instruction is not
+            // complete.
+            DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
+                    "Store idx %i to load addr %#x\n",
+                    store_idx, req->getVaddr());
+
+            return NoFault;
+        }
+    }
+
+    // If there's no forwarding case, then go access memory
+    DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+            load_inst->seqNum, load_inst->readPC());
+
+    assert(!load_inst->memData);
+    load_inst->memData = new uint8_t[64];
+
+    ++usedPorts;
+
+    DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
+            load_inst->readPC());
+
+    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+    data_pkt->dataStatic(load_inst->memData);
+
+    // if we have a cache, do cache access too
+    if (!dcachePort->sendTiming(data_pkt)) {
+        // There's an older load that's already going to squash.
+        if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
+            return NoFault;
+
+        // Record that the load was blocked due to memory.  This
+        // load will squash all instructions after it, be
+        // refetched, and re-executed.
+        isLoadBlocked = true;
+        loadBlockedHandled = false;
+        blockedLoadSeqNum = load_inst->seqNum;
+        // No fault occurred, even though the interface is blocked.
+        return NoFault;
+    }
+
+    if (data_pkt->result != Packet::Success) {
+        DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
+        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+                load_inst->seqNum);
+    } else {
+        DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
+        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+                load_inst->seqNum);
+    }
+
+    return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
+{
+    assert(storeQueue[store_idx].inst);
+
+    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
+            " | storeHead:%i [sn:%i]\n",
+            store_idx, req->getPaddr(), data, storeHead,
+            storeQueue[store_idx].inst->seqNum);
+
+    storeQueue[store_idx].req = req;
+    storeQueue[store_idx].size = sizeof(T);
+    storeQueue[store_idx].data = data;
+
+    // This function only writes the data to the store queue, so no fault
+    // can happen here.
+    return NoFault;
+}
+
+#endif // __CPU_O3_LSQ_UNIT_HH__
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
new file mode 100644
index 000000000..3f6af3d2c
--- /dev/null
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -0,0 +1,866 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "base/str.hh"
+#include "mem/request.hh"
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+{
+/*
+    DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
+    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+    if (iewStage->isSwitchedOut()) {
+        inst = NULL;
+        return;
+    } else if (inst->isSquashed()) {
+        iewStage->wakeCPU();
+        inst = NULL;
+        return;
+    }
+
+    iewStage->wakeCPU();
+
+    if (!inst->isExecuted()) {
+        inst->setExecuted();
+
+        // Complete access to copy data to proper place.
+        inst->completeAcc();
+    }
+
+    // Need to insert instruction into queue to commit
+    iewStage->instToCommit(inst);
+
+    iewStage->activityThisCycle();
+
+    inst = NULL;
+*/
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
+{
+/*
+    DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
+    DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
+
+    //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+    if (lsqPtr->isSwitchedOut())
+        return;
+
+    lsqPtr->cpu->wakeCPU();
+
+    if (wb)
+        lsqPtr->completeDataAccess(storeIdx);
+    lsqPtr->completeStore(storeIdx);
+*/
+}
+
+template <class Impl>
+Tick
+LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+    panic("O3CPU model does not work with atomic mode!");
+    return curTick;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+    panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
+{
+    if (status == RangeChange)
+        return;
+
+    panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+    lsq->completeDataAccess(pkt);
+    return true;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvRetry()
+{
+    panic("Retry unsupported for now!");
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+/*
+    assert(cpu->dcache_pkt != NULL);
+    assert(cpu->_status == DcacheRetry);
+    PacketPtr tmp = cpu->dcache_pkt;
+    if (sendTiming(tmp)) {
+        cpu->_status = DcacheWaitResponse;
+        cpu->dcache_pkt = NULL;
+    }
+*/
+}
+
+template <class Impl>
+LSQUnit<Impl>::LSQUnit()
+    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+      loadBlockedHandled(false)
+{
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+                    unsigned maxSQEntries, unsigned id)
+{
+    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+
+    switchedOut = false;
+
+    lsqID = id;
+
+    // Add 1 for the sentinel entry (they are circular queues).
+    LQEntries = maxLQEntries + 1;
+    SQEntries = maxSQEntries + 1;
+
+    loadQueue.resize(LQEntries);
+    storeQueue.resize(SQEntries);
+
+    loadHead = loadTail = 0;
+
+    storeHead = storeWBIdx = storeTail = 0;
+
+    usedPorts = 0;
+    cachePorts = params->cachePorts;
+
+    Port *mem_dport = params->mem->getPort("");
+    dcachePort->setPeer(mem_dport);
+    mem_dport->setPeer(dcachePort);
+
+    memDepViolator = NULL;
+
+    blockedLoadSeqNum = 0;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+    cpu = cpu_ptr;
+    dcachePort = new DcachePort(cpu, this);
+}
+
+template<class Impl>
+std::string
+LSQUnit<Impl>::name() const
+{
+    if (Impl::MaxThreads == 1) {
+        return iewStage->name() + ".lsq";
+    } else {
+        return iewStage->name() + ".lsq.thread." + to_string(lsqID);
+    }
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearLQ()
+{
+    loadQueue.clear();
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearSQ()
+{
+    storeQueue.clear();
+}
+
+#if 0
+template<class Impl>
+void
+LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
+{
+    DPRINTF(LSQUnit, "Setting the page table pointer.\n");
+    pTable = pt_ptr;
+}
+#endif
+
+template<class Impl>
+void
+LSQUnit<Impl>::switchOut()
+{
+    switchedOut = true;
+    for (int i = 0; i < loadQueue.size(); ++i)
+        loadQueue[i] = NULL;
+
+    assert(storesToWB == 0);
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::takeOverFrom()
+{
+    switchedOut = false;
+    loads = stores = storesToWB = 0;
+
+    loadHead = loadTail = 0;
+
+    storeHead = storeWBIdx = storeTail = 0;
+
+    usedPorts = 0;
+
+    memDepViolator = NULL;
+
+    blockedLoadSeqNum = 0;
+
+    stalled = false;
+    isLoadBlocked = false;
+    loadBlockedHandled = false;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeLQ(unsigned size)
+{
+    unsigned size_plus_sentinel = size + 1;
+    assert(size_plus_sentinel >= LQEntries);
+
+    if (size_plus_sentinel > LQEntries) {
+        while (size_plus_sentinel > loadQueue.size()) {
+            DynInstPtr dummy;
+            loadQueue.push_back(dummy);
+            LQEntries++;
+        }
+    } else {
+        LQEntries = size_plus_sentinel;
+    }
+
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeSQ(unsigned size)
+{
+    unsigned size_plus_sentinel = size + 1;
+    if (size_plus_sentinel > SQEntries) {
+        while (size_plus_sentinel > storeQueue.size()) {
+            SQEntry dummy;
+            storeQueue.push_back(dummy);
+            SQEntries++;
+        }
+    } else {
+        SQEntries = size_plus_sentinel;
+    }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insert(DynInstPtr &inst)
+{
+    assert(inst->isMemRef());
+
+    assert(inst->isLoad() || inst->isStore());
+
+    if (inst->isLoad()) {
+        insertLoad(inst);
+    } else {
+        insertStore(inst);
+    }
+
+    inst->setInLSQ();
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+    assert((loadTail + 1) % LQEntries != loadHead);
+    assert(loads < LQEntries);
+
+    DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+            load_inst->readPC(), loadTail, load_inst->seqNum);
+
+    load_inst->lqIdx = loadTail;
+
+    if (stores == 0) {
+        load_inst->sqIdx = -1;
+    } else {
+        load_inst->sqIdx = storeTail;
+    }
+
+    loadQueue[loadTail] = load_inst;
+
+    incrLdIdx(loadTail);
+
+    ++loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
+{
+    // Make sure it is not full before inserting an instruction.
+    assert((storeTail + 1) % SQEntries != storeHead);
+    assert(stores < SQEntries);
+
+    DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+            store_inst->readPC(), storeTail, store_inst->seqNum);
+
+    store_inst->sqIdx = storeTail;
+    store_inst->lqIdx = loadTail;
+
+    storeQueue[storeTail] = SQEntry(store_inst);
+
+    incrStIdx(storeTail);
+
+    ++stores;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+LSQUnit<Impl>::getMemDepViolator()
+{
+    DynInstPtr temp = memDepViolator;
+
+    memDepViolator = NULL;
+
+    return temp;
+}
+
+template <class Impl>
+unsigned
+LSQUnit<Impl>::numFreeEntries()
+{
+    unsigned free_lq_entries = LQEntries - loads;
+    unsigned free_sq_entries = SQEntries - stores;
+
+    // Both the LQ and SQ entries have an extra dummy entry to differentiate
+    // empty/full conditions.  Subtract 1 from the free entries.
+    if (free_lq_entries < free_sq_entries) {
+        return free_lq_entries - 1;
+    } else {
+        return free_sq_entries - 1;
+    }
+}
+
+template <class Impl>
+int
+LSQUnit<Impl>::numLoadsReady()
+{
+    int load_idx = loadHead;
+    int retval = 0;
+
+    while (load_idx != loadTail) {
+        assert(loadQueue[load_idx]);
+
+        if (loadQueue[load_idx]->readyToIssue()) {
+            ++retval;
+        }
+    }
+
+    return retval;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
+{
+    // Execute a specific load.
+    Fault load_fault = NoFault;
+
+    DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
+            inst->readPC(),inst->seqNum);
+
+    load_fault = inst->initiateAcc();
+
+    // If the instruction faulted, then we need to send it along to commit
+    // without the instruction completing.
+    if (load_fault != NoFault) {
+        // Send this instruction to commit, also make sure iew stage
+        // realizes there is activity.
+        iewStage->instToCommit(inst);
+        iewStage->activityThisCycle();
+    }
+
+    return load_fault;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
+{
+    using namespace TheISA;
+    // Make sure that a store exists.
+    assert(stores != 0);
+
+    int store_idx = store_inst->sqIdx;
+
+    DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
+            store_inst->readPC(), store_inst->seqNum);
+
+    // Check the recently completed loads to see if any match this store's
+    // address.  If so, then we have a memory ordering violation.
+    int load_idx = store_inst->lqIdx;
+
+    Fault store_fault = store_inst->initiateAcc();
+//    Fault store_fault = store_inst->execute();
+
+    if (storeQueue[store_idx].size == 0) {
+        DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+                store_inst->readPC(),store_inst->seqNum);
+
+        return store_fault;
+    }
+
+    assert(store_fault == NoFault);
+
+    if (store_inst->isStoreConditional()) {
+        // Store conditionals need to set themselves as able to
+        // writeback if we haven't had a fault by here.
+        storeQueue[store_idx].canWB = true;
+
+        ++storesToWB;
+    }
+
+    if (!memDepViolator) {
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+            // It's safe to check all loads because effAddr is set to
+            // InvalAddr when the dyn inst is created.
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if ((loadQueue[load_idx]->effAddr >> 8) ==
+                (store_inst->effAddr >> 8)) {
+                // A load incorrectly passed this store.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                memDepViolator = loadQueue[load_idx];
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
+
+        // If we've reached this point, there was no violation.
+        memDepViolator = NULL;
+    }
+
+    return store_fault;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoad()
+{
+    assert(loadQueue[loadHead]);
+
+    DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
+            loadQueue[loadHead]->readPC());
+
+
+    loadQueue[loadHead] = NULL;
+
+    incrLdIdx(loadHead);
+
+    --loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+    assert(loads == 0 || loadQueue[loadHead]);
+
+    while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+        commitLoad();
+    }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+    assert(stores == 0 || storeQueue[storeHead].inst);
+
+    int store_idx = storeHead;
+
+    while (store_idx != storeTail) {
+        assert(storeQueue[store_idx].inst);
+        // Mark any stores that are now committed and have not yet
+        // been marked as able to write back.
+        if (!storeQueue[store_idx].canWB) {
+            if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+                break;
+            }
+            DPRINTF(LSQUnit, "Marking store as able to write back, PC "
+                    "%#x [sn:%lli]\n",
+                    storeQueue[store_idx].inst->readPC(),
+                    storeQueue[store_idx].inst->seqNum);
+
+            storeQueue[store_idx].canWB = true;
+
+            ++storesToWB;
+        }
+
+        incrStIdx(store_idx);
+    }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::writebackStores()
+{
+    while (storesToWB > 0 &&
+           storeWBIdx != storeTail &&
+           storeQueue[storeWBIdx].inst &&
+           storeQueue[storeWBIdx].canWB &&
+           usedPorts < cachePorts) {
+
+        // Store didn't write any data so no need to write it back to
+        // memory.
+        if (storeQueue[storeWBIdx].size == 0) {
+            completeStore(storeWBIdx);
+
+            incrStIdx(storeWBIdx);
+
+            continue;
+        }
+/*
+        if (dcacheInterface && dcacheInterface->isBlocked()) {
+            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+                    " is blocked!\n");
+            break;
+        }
+*/
+        ++usedPorts;
+
+        if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+            incrStIdx(storeWBIdx);
+
+            continue;
+        }
+
+        assert(storeQueue[storeWBIdx].req);
+        assert(!storeQueue[storeWBIdx].committed);
+
+        DynInstPtr inst = storeQueue[storeWBIdx].inst;
+
+        Request *req = storeQueue[storeWBIdx].req;
+        storeQueue[storeWBIdx].committed = true;
+
+        assert(!inst->memData);
+        inst->memData = new uint8_t[64];
+        memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize());
+
+        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        data_pkt->dataStatic(inst->memData);
+
+        DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
+                "to Addr:%#x, data:%#x [sn:%lli]\n",
+                storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+                req->getPaddr(), *(inst->memData),
+                storeQueue[storeWBIdx].inst->seqNum);
+
+        if (!dcachePort->sendTiming(data_pkt)) {
+            // Need to handle becoming blocked on a store.
+        } else {
+            /*
+            StoreCompletionEvent *store_event = new
+                StoreCompletionEvent(storeWBIdx, NULL, this);
+            */
+            if (isStalled() &&
+                storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+                DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+                        "load idx:%i\n",
+                        stallingStoreIsn, stallingLoadIdx);
+                stalled = false;
+                stallingStoreIsn = 0;
+                iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+            }
+/*
+            typename LdWritebackEvent *wb = NULL;
+            if (req->flags & LOCKED) {
+                // Stx_C should not generate a system port transaction
+                // if it misses in the cache, but that might be hard
+                // to accomplish without explicit cache support.
+                wb = new typename
+                    LdWritebackEvent(storeQueue[storeWBIdx].inst,
+                                     iewStage);
+                store_event->wbEvent = wb;
+            }
+*/
+            if (data_pkt->result != Packet::Success) {
+                DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+                        storeWBIdx);
+
+                DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+                        storeQueue[storeWBIdx].inst->seqNum);
+
+                //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+                //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+                // @todo: Increment stat here.
+            } else {
+                DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
+                        storeWBIdx);
+
+                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+                        storeQueue[storeWBIdx].inst->seqNum);
+            }
+
+            incrStIdx(storeWBIdx);
+        }
+    }
+
+    // Not sure this should set it to 0.
+    usedPorts = 0;
+
+    assert(stores >= 0 && storesToWB >= 0);
+}
+
+/*template <class Impl>
+void
+LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
+{
+    list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+                                              mshrSeqNums.end(),
+                                              seqNum);
+
+    if (mshr_it != mshrSeqNums.end()) {
+        mshrSeqNums.erase(mshr_it);
+        DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+    }
+}*/
+
+template <class Impl>
+void
+LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
+{
+    DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
+            "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
+
+    int load_idx = loadTail;
+    decrLdIdx(load_idx);
+
+    while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+        DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
+                "[sn:%lli]\n",
+                loadQueue[load_idx]->readPC(),
+                loadQueue[load_idx]->seqNum);
+
+        if (isStalled() && load_idx == stallingLoadIdx) {
+            stalled = false;
+            stallingStoreIsn = 0;
+            stallingLoadIdx = 0;
+        }
+
+        // Clear the smart pointer to make sure it is decremented.
+        loadQueue[load_idx]->squashed = true;
+        loadQueue[load_idx] = NULL;
+        --loads;
+
+        // Inefficient!
+        loadTail = load_idx;
+
+        decrLdIdx(load_idx);
+    }
+
+    if (isLoadBlocked) {
+        if (squashed_num < blockedLoadSeqNum) {
+            isLoadBlocked = false;
+            loadBlockedHandled = false;
+            blockedLoadSeqNum = 0;
+        }
+    }
+
+    int store_idx = storeTail;
+    decrStIdx(store_idx);
+
+    while (stores != 0 &&
+           storeQueue[store_idx].inst->seqNum > squashed_num) {
+        // Instructions marked as can WB are already committed.
+        if (storeQueue[store_idx].canWB) {
+            break;
+        }
+
+        DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
+                "idx:%i [sn:%lli]\n",
+                storeQueue[store_idx].inst->readPC(),
+                store_idx, storeQueue[store_idx].inst->seqNum);
+
+        // I don't think this can happen.  It should have been cleared
+        // by the stalling load.
+        if (isStalled() &&
+            storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+            panic("Is stalled should have been cleared by stalling load!\n");
+            stalled = false;
+            stallingStoreIsn = 0;
+        }
+
+        // Clear the smart pointer to make sure it is decremented.
+        storeQueue[store_idx].inst->squashed = true;
+        storeQueue[store_idx].inst = NULL;
+        storeQueue[store_idx].canWB = 0;
+
+        storeQueue[store_idx].req = NULL;
+        --stores;
+
+        // Inefficient!
+        storeTail = store_idx;
+
+        decrStIdx(store_idx);
+    }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::completeStore(int store_idx)
+{
+    assert(storeQueue[store_idx].inst);
+    storeQueue[store_idx].completed = true;
+    --storesToWB;
+    // A bit conservative because a store completion may not free up entries,
+    // but hopefully avoids two store completions in one cycle from making
+    // the CPU tick twice.
+    cpu->activityThisCycle();
+
+    if (store_idx == storeHead) {
+        do {
+            incrStIdx(storeHead);
+
+            --stores;
+        } while (storeQueue[storeHead].completed &&
+                 storeHead != storeTail);
+
+        iewStage->updateLSQNextCycle = true;
+    }
+
+    DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+            "idx:%i\n",
+            storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
+
+    if (isStalled() &&
+        storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+                "load idx:%i\n",
+                stallingStoreIsn, stallingLoadIdx);
+        stalled = false;
+        stallingStoreIsn = 0;
+        iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+    }
+
+    storeQueue[store_idx].inst->setCompleted();
+
+    // Tell the checker we've completed this instruction.  Some stores
+    // may get reported twice to the checker, but the checker can
+    // handle that case.
+    if (cpu->checker) {
+        cpu->checker->tick(storeQueue[store_idx].inst);
+    }
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrStIdx(int &store_idx)
+{
+    if (++store_idx >= SQEntries)
+        store_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrStIdx(int &store_idx)
+{
+    if (--store_idx < 0)
+        store_idx += SQEntries;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrLdIdx(int &load_idx)
+{
+    if (++load_idx >= LQEntries)
+        load_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrLdIdx(int &load_idx)
+{
+    if (--load_idx < 0)
+        load_idx += LQEntries;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::dumpInsts()
+{
+    cprintf("Load store queue: Dumping instructions.\n");
+    cprintf("Load queue size: %i\n", loads);
+    cprintf("Load queue: ");
+
+    int load_idx = loadHead;
+
+    while (load_idx != loadTail && loadQueue[load_idx]) {
+        cprintf("%#x ", loadQueue[load_idx]->readPC());
+
+        incrLdIdx(load_idx);
+    }
+
+    cprintf("Store queue size: %i\n", stores);
+    cprintf("Store queue: ");
+
+    int store_idx = storeHead;
+
+    while (store_idx != storeTail && storeQueue[store_idx].inst) {
+        cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+
+        incrStIdx(store_idx);
+    }
+
+    cprintf("\n");
+}
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 3350903db..45fe490d2 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -31,6 +31,7 @@
 
 #include "arch/isa_traits.hh"
 #include "arch/faults.hh"
+#include "arch/types.hh"
 #include "base/trace.hh"
 #include "config/full_system.hh"
 #include "cpu/o3/comm.hh"
@@ -44,9 +45,8 @@
 
 /**
  * Simple physical register file class.
- * This really only depends on the ISA, and not the Impl. Things that are
- * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably
- * should go in the AlphaFullCPU.
+ * Right now this is specific to Alpha until we decide if/how to make things
+ * generic enough to support other ISAs.
  */
 template <class Impl>
 class PhysRegFile
@@ -54,8 +54,15 @@ class PhysRegFile
   protected:
     typedef TheISA::IntReg IntReg;
     typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::MiscRegFile MiscRegFile;
     typedef TheISA::MiscReg MiscReg;
+
+    typedef union {
+        FloatReg d;
+        FloatRegBits q;
+    } PhysFloatReg;
+
     // Note that most of the definitions of the IntReg, FloatReg, etc. exist
     // within the Impl/ISA class and not within this PhysRegFile class.
 
@@ -97,7 +104,7 @@ class PhysRegFile
 
         assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
 
-        FloatReg floatReg = floatRegFile.readReg(reg_idx, width);
+        FloatReg floatReg = floatRegFile[reg_idx].d;
 
         DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has "
                 "data %8.8d\n", int(reg_idx), (double)floatReg);
@@ -113,7 +120,7 @@ class PhysRegFile
 
         assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
 
-        FloatReg floatReg = floatRegFile.readReg(reg_idx);
+        FloatReg floatReg = floatRegFile[reg_idx].d;
 
         DPRINTF(IEW, "RegFile: Access to float register %i, has "
                 "data %8.8d\n", int(reg_idx), (double)floatReg);
@@ -129,7 +136,7 @@ class PhysRegFile
 
         assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
 
-        FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width);
+        FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
 
         DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, "
                 "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
@@ -144,7 +151,7 @@ class PhysRegFile
 
         assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
 
-        FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx);
+        FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
 
         DPRINTF(IEW, "RegFile: Access to float register %i as int, "
                 "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
@@ -176,7 +183,7 @@ class PhysRegFile
                 int(reg_idx), (double)val);
 
         if (reg_idx != TheISA::ZeroReg)
-            floatRegFile.setReg(reg_idx, val, width);
+            floatRegFile[reg_idx].d = width;
     }
 
     /** Sets a double precision floating point register to the given value. */
@@ -191,7 +198,7 @@ class PhysRegFile
                 int(reg_idx), (double)val);
 
         if (reg_idx != TheISA::ZeroReg)
-            floatRegFile.setReg(reg_idx, val);
+            floatRegFile[reg_idx].d = val;
     }
 
     /** Sets a floating point register to the given integer value. */
@@ -205,7 +212,7 @@ class PhysRegFile
         DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
                 int(reg_idx), (uint64_t)val);
 
-        floatRegFile.setRegBits(reg_idx, val, width);
+        floatRegFile[reg_idx].q = val;
     }
 
     void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
@@ -217,6 +224,13 @@ class PhysRegFile
 
         DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
                 int(reg_idx), (uint64_t)val);
+
+        floatRegFile[reg_idx].q = val;
+    }
+
+    MiscReg readMiscReg(int misc_reg, unsigned thread_id)
+    {
+        return miscRegs[thread_id].readReg(misc_reg);
     }
 
     MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault,
@@ -249,7 +263,7 @@ class PhysRegFile
     std::vector<IntReg> intRegFile;
 
     /** Floating point register file. */
-    std::vector<FloatReg> floatRegFile;
+    std::vector<PhysFloatReg> floatRegFile;
 
     /** Miscellaneous register file. */
     MiscRegFile miscRegs[Impl::MaxThreads];
diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh
index d01fd93ce..640445407 100644
--- a/src/cpu/o3/sat_counter.hh
+++ b/src/cpu/o3/sat_counter.hh
@@ -29,6 +29,7 @@
 #ifndef __CPU_O3_SAT_COUNTER_HH__
 #define __CPU_O3_SAT_COUNTER_HH__
 
+#include "base/misc.hh"
 #include "sim/host.hh"
 
 /**
diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc
new file mode 100644
index 000000000..b0e433620
--- /dev/null
+++ b/src/cpu/o3/scoreboard.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/scoreboard.hh"
+
+Scoreboard::Scoreboard(unsigned activeThreads,
+                       unsigned _numLogicalIntRegs,
+                       unsigned _numPhysicalIntRegs,
+                       unsigned _numLogicalFloatRegs,
+                       unsigned _numPhysicalFloatRegs,
+                       unsigned _numMiscRegs,
+                       unsigned _zeroRegIdx)
+    : numLogicalIntRegs(_numLogicalIntRegs),
+      numPhysicalIntRegs(_numPhysicalIntRegs),
+      numLogicalFloatRegs(_numLogicalFloatRegs),
+      numPhysicalFloatRegs(_numPhysicalFloatRegs),
+      numMiscRegs(_numMiscRegs),
+      zeroRegIdx(_zeroRegIdx)
+{
+    //Get Register Sizes
+    numLogicalRegs = numLogicalIntRegs  + numLogicalFloatRegs;
+    numPhysicalRegs = numPhysicalIntRegs  + numPhysicalFloatRegs;
+
+    //Resize scoreboard appropriately
+    regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads));
+
+    //Initialize values
+    for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
+        regScoreBoard[i] = 1;
+    }
+
+    for (int i= numPhysicalIntRegs;
+         i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
+         i++) {
+        regScoreBoard[i] = 1;
+    }
+
+    for (int i = numPhysicalRegs;
+         i < numPhysicalRegs + (numMiscRegs * activeThreads);
+         i++) {
+        regScoreBoard[i] = 1;
+    }
+}
+
+std::string
+Scoreboard::name() const
+{
+    return "cpu.scoreboard";
+}
+
+bool
+Scoreboard::getReg(PhysRegIndex phys_reg)
+{
+    // Always ready if int or fp zero reg.
+    if (phys_reg == zeroRegIdx ||
+        phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+        return 1;
+    }
+
+    return regScoreBoard[phys_reg];
+}
+
+void
+Scoreboard::setReg(PhysRegIndex phys_reg)
+{
+    DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
+
+    regScoreBoard[phys_reg] = 1;
+}
+
+void
+Scoreboard::unsetReg(PhysRegIndex ready_reg)
+{
+    if (ready_reg == zeroRegIdx ||
+        ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+        // Don't do anything if int or fp zero reg.
+        return;
+    }
+
+    regScoreBoard[ready_reg] = 0;
+}
diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh
new file mode 100644
index 000000000..77f2cf157
--- /dev/null
+++ b/src/cpu/o3/scoreboard.hh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_SCOREBOARD_HH__
+#define __CPU_O3_SCOREBOARD_HH__
+
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/o3/comm.hh"
+
+/**
+ * Implements a simple scoreboard to track which registers are ready.
+ * This class assumes that the fp registers start, index wise, right after
+ * the integer registers. The misc. registers start, index wise, right after
+ * the fp registers.
+ * @todo: Fix up handling of the zero register in case the decoder does not
+ * automatically make insts that write the zero register into nops.
+ */
+class Scoreboard
+{
+  public:
+    /** Constructs a scoreboard.
+     *  @param activeThreads The number of active threads.
+     *  @param _numLogicalIntRegs Number of logical integer registers.
+     *  @param _numPhysicalIntRegs Number of physical integer registers.
+     *  @param _numLogicalFloatRegs Number of logical fp registers.
+     *  @param _numPhysicalFloatRegs Number of physical fp registers.
+     *  @param _numMiscRegs Number of miscellaneous registers.
+     *  @param _zeroRegIdx Index of the zero register.
+     */
+    Scoreboard(unsigned activeThreads,
+               unsigned _numLogicalIntRegs,
+               unsigned _numPhysicalIntRegs,
+               unsigned _numLogicalFloatRegs,
+               unsigned _numPhysicalFloatRegs,
+               unsigned _numMiscRegs,
+               unsigned _zeroRegIdx);
+
+    /** Destructor. */
+    ~Scoreboard() {}
+
+    /** Returns the name of the scoreboard. */
+    std::string name() const;
+
+    /** Checks if the register is ready. */
+    bool getReg(PhysRegIndex ready_reg);
+
+    /** Sets the register as ready. */
+    void setReg(PhysRegIndex phys_reg);
+
+    /** Sets the register as not ready. */
+    void unsetReg(PhysRegIndex ready_reg);
+
+  private:
+    /** Scoreboard of physical integer registers, saying whether or not they
+     *  are ready.
+     */
+    std::vector<bool> regScoreBoard;
+
+    /** Number of logical integer registers. */
+    int numLogicalIntRegs;
+
+    /** Number of physical integer registers. */
+    int numPhysicalIntRegs;
+
+    /** Number of logical floating point registers. */
+    int numLogicalFloatRegs;
+
+    /** Number of physical floating point registers. */
+    int numPhysicalFloatRegs;
+
+    /** Number of miscellaneous registers. */
+    int numMiscRegs;
+
+    /** Number of logical integer + float registers. */
+    int numLogicalRegs;
+
+    /** Number of physical integer + float registers. */
+    int numPhysicalRegs;
+
+    /** The logical index of the zero register. */
+    int zeroRegIdx;
+};
+
+#endif
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
new file mode 100644
index 000000000..9101eafb9
--- /dev/null
+++ b/src/cpu/o3/thread_state.hh
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_THREAD_STATE_HH__
+#define __CPU_O3_THREAD_STATE_HH__
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/thread_state.hh"
+
+class Event;
+class Process;
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+#else
+class FunctionalMemory;
+class Process;
+#endif
+
+/**
+ * Class that has various thread state, such as the status, the
+ * current instruction being processed, whether or not the thread has
+ * a trap pending or is being externally updated, the ExecContext
+ * proxy pointer, etc.  It also handles anything related to a specific
+ * thread's process, such as syscalls and checking valid addresses.
+ */
+template <class Impl>
+struct O3ThreadState : public ThreadState {
+    typedef ExecContext::Status Status;
+    typedef typename Impl::FullCPU FullCPU;
+
+    Status _status;
+
+    // Current instruction
+    TheISA::MachInst inst;
+  private:
+    FullCPU *cpu;
+  public:
+
+    bool inSyscall;
+
+    bool trapPending;
+
+#if FULL_SYSTEM
+    O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
+        : ThreadState(-1, _thread_num, _mem),
+          inSyscall(0), trapPending(0)
+    { }
+#else
+    O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
+        : ThreadState(-1, _thread_num, NULL, _process, _asid),
+          cpu(_cpu), inSyscall(0), trapPending(0)
+    { }
+
+    O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
+                  int _asid)
+        : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+          cpu(_cpu), inSyscall(0), trapPending(0)
+    { }
+#endif
+
+    ExecContext *xcProxy;
+
+    ExecContext *getXCProxy() { return xcProxy; }
+
+    Status status() const { return _status; }
+
+    void setStatus(Status new_status) { _status = new_status; }
+
+    bool misspeculating() { return false; }
+
+    void setInst(TheISA::MachInst _inst) { inst = _inst; }
+
+    Counter readFuncExeInst() { return funcExeInst; }
+
+    void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+
+#if !FULL_SYSTEM
+    void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
+#endif
+};
+
+#endif // __CPU_O3_THREAD_STATE_HH__
-- 
cgit v1.2.3