34 files changed, 846 insertions, 935 deletions
diff --git a/SConscript b/SConscript
index 525a94818..7b5b2a970 100644
--- a/SConscript
+++ b/SConscript
@@ -52,7 +52,6 @@ base_sources = Split('''
 	arch/alpha/full_cpu_exec.cc
 	arch/alpha/faults.cc
 	arch/alpha/isa_traits.cc
-        arch/alpha/ooo_cpu_exec.cc
 
 	base/circlebuf.cc
 	base/copyright.cc
@@ -157,10 +156,6 @@ base_sources = Split('''
         cpu/full_cpu/iq/seznec/iq_seznec.cc
         cpu/full_cpu/iq/standard/iq_standard.cc
         cpu/inorder_cpu/inorder_cpu.cc
-        cpu/ooo_cpu/ea_list.cc
-        cpu/ooo_cpu/ooo_cpu.cc
-        cpu/ooo_cpu/ooo_dyn_inst.cc
-        cpu/ooo_cpu/ooo_sim_obj.cc
         cpu/sampling_cpu/sampling_cpu.cc
         cpu/simple_cpu/simple_cpu.cc
         cpu/trace/reader/mem_trace_reader.cc
@@ -402,8 +397,7 @@ env.Command(Split('''arch/alpha/decoder.cc
 		     arch/alpha/fast_cpu_exec.cc
                      arch/alpha/simple_cpu_exec.cc
                      arch/alpha/inorder_cpu_exec.cc
-                     arch/alpha/full_cpu_exec.cc
-                     arch/alpha/ooo_cpu_exec.cc'''),
+                     arch/alpha/full_cpu_exec.cc'''),
             Split('''arch/alpha/isa_desc
 		     arch/isa_parser.py'''),
             '$SRCDIR/arch/isa_parser.py $SOURCE $TARGET.dir arch/alpha')
diff --git a/arch/alpha/faults.hh b/arch/alpha/faults.hh
index 45ac122dc..a49a1c4f0 100644
--- a/arch/alpha/faults.hh
+++ b/arch/alpha/faults.hh
@@ -47,6 +47,7 @@ enum Fault {
     Fen_Fault,			// FP not-enabled fault
     Pal_Fault,			// call_pal S/W interrupt
     Integer_Overflow_Fault,
+    Fake_Mem_Fault,
     Num_Faults			// number of faults
 };
 
diff --git a/arch/alpha/isa_desc b/arch/alpha/isa_desc
index 904af3ef0..0e07400d3 100644
--- a/arch/alpha/isa_desc
+++ b/arch/alpha/isa_desc
@@ -744,9 +744,9 @@ output header {{
 	/// Memory request flags.  See mem_req_base.hh.
         unsigned memAccessFlags;
 	/// Pointer to EAComp object.
-	StaticInstPtr<AlphaISA> eaCompPtr;
+	const StaticInstPtr<AlphaISA> eaCompPtr;
 	/// Pointer to MemAcc object.
-	StaticInstPtr<AlphaISA> memAccPtr;
+	const StaticInstPtr<AlphaISA> memAccPtr;
 
 	/// Constructor
 	Memory(const char *mnem, MachInst _machInst, OpClass __opClass,
@@ -762,8 +762,8 @@ output header {{
 
       public:
 
-	StaticInstPtr<AlphaISA> &eaCompInst() { return eaCompPtr; }
-	StaticInstPtr<AlphaISA> &memAccInst() { return memAccPtr; }
+	const StaticInstPtr<AlphaISA> &eaCompInst() const { return eaCompPtr; }
+	const StaticInstPtr<AlphaISA> &memAccInst() const { return memAccPtr; }
     };
 
     /**
@@ -2539,9 +2539,9 @@ decode OPCODE default Unknown::unknown() {
 		xc->syscall();
 	    }}, IsNonSpeculative);
 	    // Read uniq reg into ABI return value register (r0)
-	    0x9e: rduniq({{ R0 = Runiq; }}, IsNonSpeculative);
+	    0x9e: rduniq({{ R0 = Runiq; }});
 	    // Write uniq reg with value from ABI arg register (r16)
-	    0x9f: wruniq({{ Runiq = R16; }}, IsNonSpeculative);
+	    0x9f: wruniq({{ Runiq = R16; }});
 	}
     }
 #endif
diff --git a/arch/isa_parser.py b/arch/isa_parser.py
index 5e0267c9e..8187cf188 100755
--- a/arch/isa_parser.py
+++ b/arch/isa_parser.py
@@ -642,9 +642,6 @@ CpuModel('FullCPU', 'full_cpu_exec.cc',
 CpuModel('AlphaFullCPU', 'alpha_full_cpu_exec.cc',
          '#include "cpu/beta_cpu/alpha_dyn_inst.hh"',
          { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' })
-CpuModel('OoOCPU', 'ooo_cpu_exec.cc',
-         '#include "cpu/ooo_cpu/ooo_dyn_inst.hh"',
-         { 'CPU_exec_context': 'OoODynInst<OoOImpl>' })
 
 # Expand template with CPU-specific references into a dictionary with
 # an entry for each CPU model name.  The entry key is the model name
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc
index b8424f576..ecfe5a4b0 100644
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -43,8 +43,6 @@
 #include "cpu/base_dyn_inst.hh"
 #include "cpu/beta_cpu/alpha_impl.hh"
 #include "cpu/beta_cpu/alpha_full_cpu.hh"
-#include "cpu/ooo_cpu/ooo_impl.hh"
-#include "cpu/ooo_cpu/ooo_cpu.hh"
 
 using namespace std;
 
@@ -384,14 +382,9 @@ BaseDynInst<Impl>::eaSrcsReady()
 
 // Forward declaration...
 template class BaseDynInst<AlphaSimpleImpl>;
-template class BaseDynInst<OoOImpl>;
 
 template <>
 int
 BaseDynInst<AlphaSimpleImpl>::instcount = 0;
 
-template <>
-int
-BaseDynInst<OoOImpl>::instcount = 0;
-
 #endif // __CPU_BASE_DYN_INST_CC__
diff --git a/cpu/base_dyn_inst.hh b/cpu/base_dyn_inst.hh
index 943293b25..509874fad 100644
--- a/cpu/base_dyn_inst.hh
+++ b/cpu/base_dyn_inst.hh
@@ -404,6 +404,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
     const Addr &getEA() const { return instEffAddr; }
     bool doneEACalc() { return eaCalcDone; }
     bool eaSrcsReady();
+
+  public:
+    int16_t lqIdx;
+    int16_t sqIdx;
 };
 
 template<class Impl>
@@ -419,6 +423,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
     // Record key MemReq parameters so we can generate another one
     // just like it for the timing access without calling translate()
     // again (which might mess up the TLB).
+    // Do I ever really need this? -KTL 3/05
     effAddr = req->vaddr;
     physEffAddr = req->paddr;
     memReqFlags = req->flags;
@@ -433,7 +438,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 #endif
 
     if (fault == No_Fault) {
-        fault = cpu->read(req, data);
+        fault = cpu->read(req, data, lqIdx);
     }
     else {
         // Return a fixed value to keep simulation deterministic even
@@ -459,8 +464,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         traceData->setData(data);
     }
 
-    storeSize = sizeof(T);
-    storeData = data;
+//    storeSize = sizeof(T);
+//    storeData = data;
 
     MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
 
@@ -485,7 +490,7 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #endif
 
     if (fault == No_Fault) {
-        fault = cpu->write(req, data);
+        fault = cpu->write(req, data, sqIdx);
     }
 
     if (res) {
diff --git a/cpu/beta_cpu/alpha_dyn_inst.hh b/cpu/beta_cpu/alpha_dyn_inst.hh
index b2f0d703e..d34fa071c 100644
--- a/cpu/beta_cpu/alpha_dyn_inst.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst.hh
@@ -47,11 +47,10 @@ class AlphaDynInst : public BaseDynInst<Impl>
     /** BaseDynInst constructor given a static inst pointer. */
     AlphaDynInst(StaticInstPtr<AlphaISA> &_staticInst);
 
-    /** Executes the instruction.  Why the hell did I put this here? */
+    /** Executes the instruction.*/
     Fault execute()
     {
-        this->fault = this->staticInst->execute(this, this->traceData);
-        return this->fault;
+        return this->fault = this->staticInst->execute(this, this->traceData);
     }
 
   public:
@@ -105,47 +104,47 @@ class AlphaDynInst : public BaseDynInst<Impl>
     // storage (which is pretty hard to imagine they would have reason
     // to do).
 
-    uint64_t readIntReg(StaticInst<ISA> *si, int idx)
+    uint64_t readIntReg(const StaticInst<ISA> *si, int idx)
     {
         return this->cpu->readIntReg(_srcRegIdx[idx]);
     }
 
-    float readFloatRegSingle(StaticInst<ISA> *si, int idx)
+    float readFloatRegSingle(const StaticInst<ISA> *si, int idx)
     {
         return this->cpu->readFloatRegSingle(_srcRegIdx[idx]);
     }
 
-    double readFloatRegDouble(StaticInst<ISA> *si, int idx)
+    double readFloatRegDouble(const StaticInst<ISA> *si, int idx)
     {
         return this->cpu->readFloatRegDouble(_srcRegIdx[idx]);
     }
 
-    uint64_t readFloatRegInt(StaticInst<ISA> *si, int idx)
+    uint64_t readFloatRegInt(const StaticInst<ISA> *si, int idx)
     {
         return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
     }
     /** @todo: Make results into arrays so they can handle multiple dest
      *  registers.
      */
-    void setIntReg(StaticInst<ISA> *si, int idx, uint64_t val)
+    void setIntReg(const StaticInst<ISA> *si, int idx, uint64_t val)
     {
         this->cpu->setIntReg(_destRegIdx[idx], val);
         this->instResult.integer = val;
     }
 
-    void setFloatRegSingle(StaticInst<ISA> *si, int idx, float val)
+    void setFloatRegSingle(const StaticInst<ISA> *si, int idx, float val)
     {
         this->cpu->setFloatRegSingle(_destRegIdx[idx], val);
         this->instResult.fp = val;
     }
 
-    void setFloatRegDouble(StaticInst<ISA> *si, int idx, double val)
+    void setFloatRegDouble(const StaticInst<ISA> *si, int idx, double val)
     {
         this->cpu->setFloatRegDouble(_destRegIdx[idx], val);
         this->instResult.dbl = val;
     }
 
-    void setFloatRegInt(StaticInst<ISA> *si, int idx, uint64_t val)
+    void setFloatRegInt(const StaticInst<ISA> *si, int idx, uint64_t val)
     {
         this->cpu->setFloatRegInt(_destRegIdx[idx], val);
         this->instResult.integer = val;
diff --git a/cpu/beta_cpu/alpha_dyn_inst_impl.hh b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
index 4a3ae99d4..3f530e182 100644
--- a/cpu/beta_cpu/alpha_dyn_inst_impl.hh
+++ b/cpu/beta_cpu/alpha_dyn_inst_impl.hh
@@ -129,7 +129,8 @@ template <class Impl>
 void
 AlphaDynInst<Impl>::syscall()
 {
-    this->cpu->syscall();
+    this->cpu->syscall(this->threadNumber);
+//    this->cpu->syscall();
 }
 #endif
 
diff --git a/cpu/beta_cpu/alpha_full_cpu.hh b/cpu/beta_cpu/alpha_full_cpu.hh
index 3c29dd277..065b2fc4e 100644
--- a/cpu/beta_cpu/alpha_full_cpu.hh
+++ b/cpu/beta_cpu/alpha_full_cpu.hh
@@ -28,8 +28,6 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
     void regStats();
 
 #ifdef FULL_SYSTEM
-    bool inPalMode();
-
     //Note that the interrupt stuff from the base CPU might be somewhat
     //ISA specific (ie NumInterruptLevels).  These functions might not
     //be needed in FullCPU though.
@@ -106,13 +104,16 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
     }
 
 #ifdef FULL_SYSTEM
-    uint64_t *getIPR();
+    uint64_t *getIpr();
     uint64_t readIpr(int idx, Fault &fault);
     Fault setIpr(int idx, uint64_t val);
     int readIntrFlag();
     void setIntrFlag(int val);
     Fault hwrei();
-    bool inPalMode();
+    bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); }
+    bool inPalMode(uint64_t PC)
+    { return AlphaISA::PcPAL(PC); }
+
     void trap(Fault fault);
     bool simPalCheck(int palFunc);
 
@@ -153,7 +154,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
         }
     }
 
-    void syscall();
+    void syscall(short thread_num);
     void squashStages();
 
 #endif
@@ -168,11 +169,13 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
     // Not sure this is used anywhere.
     void intr_post(RegFile *regs, Fault fault, Addr pc);
     // Actually used within exec files.  Implement properly.
-    void swap_palshadow(RegFile *regs, bool use_shadow);
+    void swapPALShadow(bool use_shadow);
     // Called by CPU constructor.  Can implement as I please.
     void initCPU(RegFile *regs);
     // Called by initCPU.  Implement as I please.
     void initIPRs(RegFile *regs);
+
+    void halt() { panic("Halt not implemented!\n"); }
 #endif
 
 
@@ -193,6 +196,11 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
         return error;
     }
 
+    template <class T>
+    Fault read(MemReqPtr &req, T &data, int load_idx)
+    {
+        return this->iew.ldstQueue.read(req, data, load_idx);
+    }
 
     template <class T>
     Fault write(MemReqPtr &req, T &data)
@@ -218,7 +226,7 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
                         std::cerr << "Warning: "
                                   << req->xc->storeCondFailures
                                   << " consecutive store conditional failures "
-                                  << "on cpu " << cpu_id
+                                  << "on cpu " << this->cpu_id
                                   << std::endl;
                     }
                     return No_Fault;
@@ -232,8 +240,8 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
         // and all other stores (WH64?).  Unsuccessful Store
         // Conditionals would have returned above, and wouldn't fall
         // through.
-        for (int i = 0; i < system->execContexts.size(); i++){
-            cregs = &system->execContexts[i]->regs.miscRegs;
+        for (int i = 0; i < this->system->execContexts.size(); i++){
+            cregs = &this->system->execContexts[i]->regs.miscRegs;
             if ((cregs->lock_addr & ~0xf) == (req->paddr & ~0xf)) {
                 cregs->lock_flag = false;
             }
@@ -244,6 +252,12 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
         return this->mem->write(req, (T)htoa(data));
     }
 
+    template <class T>
+    Fault write(MemReqPtr &req, T &data, int store_idx)
+    {
+        return this->iew.ldstQueue.write(req, data, store_idx);
+    }
+
 };
 
 #endif // __CPU_BETA_CPU_ALPHA_FULL_CPU_HH__
diff --git a/cpu/beta_cpu/alpha_full_cpu_builder.cc b/cpu/beta_cpu/alpha_full_cpu_builder.cc
index cf9536cb8..dc5b1aad1 100644
--- a/cpu/beta_cpu/alpha_full_cpu_builder.cc
+++ b/cpu/beta_cpu/alpha_full_cpu_builder.cc
@@ -33,8 +33,17 @@
 #include "mem/functional_mem/functional_memory.hh"
 #endif // FULL_SYSTEM
 
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
+{
+  public:
+    DerivAlphaFullCPU(AlphaSimpleParams p)
+        : AlphaFullCPU<AlphaSimpleImpl>(p)
+    { }
+};
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
 
+    Param<int> cycle_time;
     Param<int> numThreads;
 
 #ifdef FULL_SYSTEM
@@ -44,8 +53,6 @@ SimObjectParam<AlphaDTB *> dtb;
 Param<int> mult;
 #else
 SimObjectVectorParam<Process *> workload;
-SimObjectParam<Process *> process;
-Param<short> asid;
 #endif // FULL_SYSTEM
 SimObjectParam<FunctionalMemory *> mem;
 
@@ -120,23 +127,25 @@ Param<unsigned> numROBEntries;
 
 Param<unsigned> instShiftAmt;
 
-Param<bool> defReg;
+Param<bool> defer_registration;
 
-END_DECLARE_SIM_OBJECT_PARAMS(BaseFullCPU)
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
 
-BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
 
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+    INIT_PARAM(cycle_time, "cpu cycle time"),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
 
 #ifdef FULL_SYSTEM
     INIT_PARAM(system, "System object"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
-    INIT_PARAM_DFLT(mult, "System clock multiplier", 1),
+    INIT_PARAM(mult, "System clock multiplier"),
 #else
     INIT_PARAM(workload, "Processes to run"),
-    INIT_PARAM_DFLT(process, "Process to run", NULL),
-    INIT_PARAM(asid, "Address space ID"),
 #endif // FULL_SYSTEM
 
     INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -230,14 +239,16 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
     INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
 
     INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
 
-    INIT_PARAM(defReg, "Defer registration")
+    INIT_PARAM(function_trace, "Enable function trace"),
+    INIT_PARAM(function_trace_start, "Cycle to start function trace")
 
-END_INIT_SIM_OBJECT_PARAMS(BaseFullCPU)
+END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
 
-CREATE_SIM_OBJECT(BaseFullCPU)
+CREATE_SIM_OBJECT(DerivAlphaFullCPU)
 {
-    AlphaFullCPU<AlphaSimpleImpl> *cpu;
+    DerivAlphaFullCPU *cpu;
 
 #ifdef FULL_SYSTEM
     if (mult != 1)
@@ -255,30 +266,21 @@ CREATE_SIM_OBJECT(BaseFullCPU)
         fatal("Must specify at least one workload!");
     }
 
-    Process *actual_process;
-
-    if (process == NULL) {
-        actual_process = workload[0];
-    } else {
-        actual_process = process;
-    }
-
 #endif
 
     AlphaSimpleParams params;
 
+    params.cycleTime = cycle_time;
+
     params.name = getInstanceName();
     params.numberOfThreads = actual_num_threads;
 
 #ifdef FULL_SYSTEM
-    params._system = system;
+    params.system = system;
     params.itb = itb;
     params.dtb = dtb;
-    params.freq = ticksPerSecond * mult;
 #else
     params.workload = workload;
-    params.process = actual_process;
-    params.asid = asid;
 #endif // FULL_SYSTEM
 
     params.mem = mem;
@@ -356,12 +358,15 @@ CREATE_SIM_OBJECT(BaseFullCPU)
 
     params.instShiftAmt = 2;
 
-    params.defReg = defReg;
+    params.defReg = defer_registration;
+
+    params.functionTrace = function_trace;
+    params.functionTraceStart = function_trace_start;
 
-    cpu = new AlphaFullCPU<AlphaSimpleImpl>(params);
+    cpu = new DerivAlphaFullCPU(params);
 
     return cpu;
 }
 
-REGISTER_SIM_OBJECT("AlphaFullCPU", BaseFullCPU)
+REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU)
 
diff --git a/cpu/beta_cpu/alpha_full_cpu_impl.hh b/cpu/beta_cpu/alpha_full_cpu_impl.hh
index fccded193..c42e9e362 100644
--- a/cpu/beta_cpu/alpha_full_cpu_impl.hh
+++ b/cpu/beta_cpu/alpha_full_cpu_impl.hh
@@ -12,6 +12,14 @@
 #include "cpu/beta_cpu/alpha_params.hh"
 #include "cpu/beta_cpu/comm.hh"
 
+#ifdef FULL_SYSTEM
+#include "arch/alpha/osfpal.hh"
+#include "arch/alpha/isa_traits.hh"
+//#include "arch/alpha/ev5.hh"
+
+//using namespace EV5;
+#endif
+
 template <class Impl>
 AlphaFullCPU<Impl>::AlphaFullCPU(Params &params)
     : FullBetaCPU<Impl>(params)
@@ -42,9 +50,12 @@ AlphaFullCPU<Impl>::regStats()
 
 #ifndef FULL_SYSTEM
 
+// Will probably need to know which thread is calling syscall
+// Will need to pass that information in to the DynInst when it is constructed,
+// so that this call can be made with the proper thread number.
 template <class Impl>
 void
-AlphaFullCPU<Impl>::syscall()
+AlphaFullCPU<Impl>::syscall(short thread_num)
 {
     DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");
 
@@ -60,7 +71,8 @@ AlphaFullCPU<Impl>::syscall()
     // Copy over all important state to xc once all the unrolling is done.
     copyToXC();
 
-    this->process->syscall(this->xc);
+    this->thread[0]->syscall();
+//    this->thread[thread_num]->syscall();
 
     // Copy over all important state back to CPU.
     copyFromXC();
@@ -102,6 +114,8 @@ AlphaFullCPU<Impl>::squashStages()
     this->iew.squash();
     this->iewQueue.advance();
     this->iewQueue.advance();
+    // Needs to tell the LSQ to write back all of its data
+    this->iew.lsqWriteback();
 
     this->rob.squash(rob_head);
     this->commit.setSquashing();
@@ -203,390 +217,35 @@ template <class Impl>
 uint64_t *
 AlphaFullCPU<Impl>::getIpr()
 {
-    return regFile.getIpr();
+    return this->regFile.getIpr();
 }
 
 template <class Impl>
 uint64_t
 AlphaFullCPU<Impl>::readIpr(int idx, Fault &fault)
 {
-    uint64_t *ipr = getIpr();
-    uint64_t retval = 0;	// return value, default 0
-
-    switch (idx) {
-      case AlphaISA::IPR_PALtemp0:
-      case AlphaISA::IPR_PALtemp1:
-      case AlphaISA::IPR_PALtemp2:
-      case AlphaISA::IPR_PALtemp3:
-      case AlphaISA::IPR_PALtemp4:
-      case AlphaISA::IPR_PALtemp5:
-      case AlphaISA::IPR_PALtemp6:
-      case AlphaISA::IPR_PALtemp7:
-      case AlphaISA::IPR_PALtemp8:
-      case AlphaISA::IPR_PALtemp9:
-      case AlphaISA::IPR_PALtemp10:
-      case AlphaISA::IPR_PALtemp11:
-      case AlphaISA::IPR_PALtemp12:
-      case AlphaISA::IPR_PALtemp13:
-      case AlphaISA::IPR_PALtemp14:
-      case AlphaISA::IPR_PALtemp15:
-      case AlphaISA::IPR_PALtemp16:
-      case AlphaISA::IPR_PALtemp17:
-      case AlphaISA::IPR_PALtemp18:
-      case AlphaISA::IPR_PALtemp19:
-      case AlphaISA::IPR_PALtemp20:
-      case AlphaISA::IPR_PALtemp21:
-      case AlphaISA::IPR_PALtemp22:
-      case AlphaISA::IPR_PALtemp23:
-      case AlphaISA::IPR_PAL_BASE:
-
-      case AlphaISA::IPR_IVPTBR:
-      case AlphaISA::IPR_DC_MODE:
-      case AlphaISA::IPR_MAF_MODE:
-      case AlphaISA::IPR_ISR:
-      case AlphaISA::IPR_EXC_ADDR:
-      case AlphaISA::IPR_IC_PERR_STAT:
-      case AlphaISA::IPR_DC_PERR_STAT:
-      case AlphaISA::IPR_MCSR:
-      case AlphaISA::IPR_ASTRR:
-      case AlphaISA::IPR_ASTER:
-      case AlphaISA::IPR_SIRR:
-      case AlphaISA::IPR_ICSR:
-      case AlphaISA::IPR_ICM:
-      case AlphaISA::IPR_DTB_CM:
-      case AlphaISA::IPR_IPLR:
-      case AlphaISA::IPR_INTID:
-      case AlphaISA::IPR_PMCTR:
-        // no side-effect
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_CC:
-        retval |= ipr[idx] & ULL(0xffffffff00000000);
-        retval |= curTick  & ULL(0x00000000ffffffff);
-        break;
-
-      case AlphaISA::IPR_VA:
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_VA_FORM:
-      case AlphaISA::IPR_MM_STAT:
-      case AlphaISA::IPR_IFAULT_VA_FORM:
-      case AlphaISA::IPR_EXC_MASK:
-      case AlphaISA::IPR_EXC_SUM:
-        retval = ipr[idx];
-        break;
-
-      case AlphaISA::IPR_DTB_PTE:
-        {
-            AlphaISA::PTE &pte = dtb->index(!misspeculating());
-
-            retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
-            retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
-            retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
-            retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
-            retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
-            retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
-            retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
-        }
-        break;
-
-        // write only registers
-      case AlphaISA::IPR_HWINT_CLR:
-      case AlphaISA::IPR_SL_XMIT:
-      case AlphaISA::IPR_DC_FLUSH:
-      case AlphaISA::IPR_IC_FLUSH:
-      case AlphaISA::IPR_ALT_MODE:
-      case AlphaISA::IPR_DTB_IA:
-      case AlphaISA::IPR_DTB_IAP:
-      case AlphaISA::IPR_ITB_IA:
-      case AlphaISA::IPR_ITB_IAP:
-        fault = Unimplemented_Opcode_Fault;
-        break;
-
-      default:
-        // invalid IPR
-        fault = Unimplemented_Opcode_Fault;
-        break;
-    }
-
-    return retval;
+    return this->regFile.readIpr(idx, fault);
 }
 
 template <class Impl>
 Fault
 AlphaFullCPU<Impl>::setIpr(int idx, uint64_t val)
 {
-    uint64_t *ipr = getIpr();
-    uint64_t old;
-
-    if (misspeculating())
-        return No_Fault;
-
-    switch (idx) {
-      case AlphaISA::IPR_PALtemp0:
-      case AlphaISA::IPR_PALtemp1:
-      case AlphaISA::IPR_PALtemp2:
-      case AlphaISA::IPR_PALtemp3:
-      case AlphaISA::IPR_PALtemp4:
-      case AlphaISA::IPR_PALtemp5:
-      case AlphaISA::IPR_PALtemp6:
-      case AlphaISA::IPR_PALtemp7:
-      case AlphaISA::IPR_PALtemp8:
-      case AlphaISA::IPR_PALtemp9:
-      case AlphaISA::IPR_PALtemp10:
-      case AlphaISA::IPR_PALtemp11:
-      case AlphaISA::IPR_PALtemp12:
-      case AlphaISA::IPR_PALtemp13:
-      case AlphaISA::IPR_PALtemp14:
-      case AlphaISA::IPR_PALtemp15:
-      case AlphaISA::IPR_PALtemp16:
-      case AlphaISA::IPR_PALtemp17:
-      case AlphaISA::IPR_PALtemp18:
-      case AlphaISA::IPR_PALtemp19:
-      case AlphaISA::IPR_PALtemp20:
-      case AlphaISA::IPR_PALtemp21:
-      case AlphaISA::IPR_PALtemp22:
-      case AlphaISA::IPR_PAL_BASE:
-      case AlphaISA::IPR_IC_PERR_STAT:
-      case AlphaISA::IPR_DC_PERR_STAT:
-      case AlphaISA::IPR_PMCTR:
-        // write entire quad w/ no side-effect
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_CC_CTL:
-        // This IPR resets the cycle counter.  We assume this only
-        // happens once... let's verify that.
-        assert(ipr[idx] == 0);
-        ipr[idx] = 1;
-        break;
-
-      case AlphaISA::IPR_CC:
-        // This IPR only writes the upper 64 bits.  It's ok to write
-        // all 64 here since we mask out the lower 32 in rpcc (see
-        // isa_desc).
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_PALtemp23:
-        // write entire quad w/ no side-effect
-        old = ipr[idx];
-        ipr[idx] = val;
-        kernelStats.context(old, val);
-        break;
-
-      case AlphaISA::IPR_DTB_PTE:
-        // write entire quad w/ no side-effect, tag is forthcoming
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_EXC_ADDR:
-        // second least significant bit in PC is always zero
-        ipr[idx] = val & ~2;
-        break;
-
-      case AlphaISA::IPR_ASTRR:
-      case AlphaISA::IPR_ASTER:
-        // only write least significant four bits - privilege mask
-        ipr[idx] = val & 0xf;
-        break;
-
-      case AlphaISA::IPR_IPLR:
-#ifdef DEBUG
-        if (break_ipl != -1 && break_ipl == (val & 0x1f))
-            debug_break();
-#endif
-
-        // only write least significant five bits - interrupt level
-        ipr[idx] = val & 0x1f;
-        kernelStats.swpipl(ipr[idx]);
-        break;
-
-      case AlphaISA::IPR_DTB_CM:
-        kernelStats.mode((val & 0x18) != 0);
-
-      case AlphaISA::IPR_ICM:
-        // only write two mode bits - processor mode
-        ipr[idx] = val & 0x18;
-        break;
-
-      case AlphaISA::IPR_ALT_MODE:
-        // only write two mode bits - processor mode
-        ipr[idx] = val & 0x18;
-        break;
-
-      case AlphaISA::IPR_MCSR:
-        // more here after optimization...
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_SIRR:
-        // only write software interrupt mask
-        ipr[idx] = val & 0x7fff0;
-        break;
-
-      case AlphaISA::IPR_ICSR:
-        ipr[idx] = val & ULL(0xffffff0300);
-        break;
-
-      case AlphaISA::IPR_IVPTBR:
-      case AlphaISA::IPR_MVPTBR:
-        ipr[idx] = val & ULL(0xffffffffc0000000);
-        break;
-
-      case AlphaISA::IPR_DC_TEST_CTL:
-        ipr[idx] = val & 0x1ffb;
-        break;
-
-      case AlphaISA::IPR_DC_MODE:
-      case AlphaISA::IPR_MAF_MODE:
-        ipr[idx] = val & 0x3f;
-        break;
-
-      case AlphaISA::IPR_ITB_ASN:
-        ipr[idx] = val & 0x7f0;
-        break;
-
-      case AlphaISA::IPR_DTB_ASN:
-        ipr[idx] = val & ULL(0xfe00000000000000);
-        break;
-
-      case AlphaISA::IPR_EXC_SUM:
-      case AlphaISA::IPR_EXC_MASK:
-        // any write to this register clears it
-        ipr[idx] = 0;
-        break;
-
-      case AlphaISA::IPR_INTID:
-      case AlphaISA::IPR_SL_RCV:
-      case AlphaISA::IPR_MM_STAT:
-      case AlphaISA::IPR_ITB_PTE_TEMP:
-      case AlphaISA::IPR_DTB_PTE_TEMP:
-        // read-only registers
-        return Unimplemented_Opcode_Fault;
-
-      case AlphaISA::IPR_HWINT_CLR:
-      case AlphaISA::IPR_SL_XMIT:
-      case AlphaISA::IPR_DC_FLUSH:
-      case AlphaISA::IPR_IC_FLUSH:
-        // the following are write only
-        ipr[idx] = val;
-        break;
-
-      case AlphaISA::IPR_DTB_IA:
-        // really a control write
-        ipr[idx] = 0;
-
-        dtb->flushAll();
-        break;
-
-      case AlphaISA::IPR_DTB_IAP:
-        // really a control write
-        ipr[idx] = 0;
-
-        dtb->flushProcesses();
-        break;
-
-      case AlphaISA::IPR_DTB_IS:
-        // really a control write
-        ipr[idx] = val;
-
-        dtb->flushAddr(val, DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
-        break;
-
-      case AlphaISA::IPR_DTB_TAG: {
-          struct AlphaISA::PTE pte;
-
-          // FIXME: granularity hints NYI...
-          if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
-              panic("PTE GH field != 0");
-
-          // write entire quad
-          ipr[idx] = val;
-
-          // construct PTE for new entry
-          pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
-          pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
-
-          // insert new TAG/PTE value into data TLB
-          dtb->insert(val, pte);
-      }
-        break;
-
-      case AlphaISA::IPR_ITB_PTE: {
-          struct AlphaISA::PTE pte;
-
-          // FIXME: granularity hints NYI...
-          if (ITB_PTE_GH(val) != 0)
-              panic("PTE GH field != 0");
-
-          // write entire quad
-          ipr[idx] = val;
-
-          // construct PTE for new entry
-          pte.ppn = ITB_PTE_PPN(val);
-          pte.xre = ITB_PTE_XRE(val);
-          pte.xwe = 0;
-          pte.fonr = ITB_PTE_FONR(val);
-          pte.fonw = ITB_PTE_FONW(val);
-          pte.asma = ITB_PTE_ASMA(val);
-          pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
-
-          // insert new TAG/PTE value into data TLB
-          itb->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
-      }
-        break;
-
-      case AlphaISA::IPR_ITB_IA:
-        // really a control write
-        ipr[idx] = 0;
-
-        itb->flushAll();
-        break;
-
-      case AlphaISA::IPR_ITB_IAP:
-        // really a control write
-        ipr[idx] = 0;
-
-        itb->flushProcesses();
-        break;
-
-      case AlphaISA::IPR_ITB_IS:
-        // really a control write
-        ipr[idx] = val;
-
-        itb->flushAddr(val, ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
-        break;
-
-      default:
-        // invalid IPR
-        return Unimplemented_Opcode_Fault;
-    }
-
-    // no error...
-    return No_Fault;
-
+    return this->regFile.setIpr(idx, val);
 }
 
 template <class Impl>
 int
 AlphaFullCPU<Impl>::readIntrFlag()
 {
-    return regs.intrflag;
+    return this->regFile.readIntrFlag();
 }
 
 template <class Impl>
 void
 AlphaFullCPU<Impl>::setIntrFlag(int val)
 {
-    regs.intrflag = val;
+    this->regFile.setIntrFlag(val);
 }
 
 // Can force commit stage to squash and stuff.
@@ -596,19 +255,17 @@ AlphaFullCPU<Impl>::hwrei()
 {
     uint64_t *ipr = getIpr();
 
-    if (!PC_PAL(regs.pc))
+    if (!inPalMode())
         return Unimplemented_Opcode_Fault;
 
     setNextPC(ipr[AlphaISA::IPR_EXC_ADDR]);
 
-    if (!misspeculating()) {
-        kernelStats.hwrei();
+//    kernelStats.hwrei();
 
-        if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
-            AlphaISA::swap_palshadow(&regs, false);
+    if ((ipr[AlphaISA::IPR_EXC_ADDR] & 1) == 0)
+//        AlphaISA::swap_palshadow(&regs, false);
 
-        AlphaISA::check_interrupts = true;
-    }
+    this->checkInterrupts = true;
 
     // FIXME: XXX check for interrupts? XXX
     return No_Fault;
@@ -616,16 +273,9 @@ AlphaFullCPU<Impl>::hwrei()
 
 template <class Impl>
 bool
-AlphaFullCPU<Impl>::inPalMode()
-{
-    return PC_PAL(readPC());
-}
-
-template <class Impl>
-bool
 AlphaFullCPU<Impl>::simPalCheck(int palFunc)
 {
-    kernelStats.callpal(palFunc);
+//    kernelStats.callpal(palFunc);
 
     switch (palFunc) {
       case PAL::halt:
@@ -636,7 +286,7 @@ AlphaFullCPU<Impl>::simPalCheck(int palFunc)
 
       case PAL::bpt:
       case PAL::bugchk:
-        if (system->breakpoint())
+        if (this->system->breakpoint())
             return false;
         break;
     }
@@ -651,21 +301,22 @@ template <class Impl>
 void
 AlphaFullCPU<Impl>::trap(Fault fault)
 {
-    uint64_t PC = commit.readPC();
+    // Keep in mind that a trap may be initiated by fetch if there's a TLB
+    // miss
+    uint64_t PC = this->commit.readCommitPC();
 
     DPRINTF(Fault, "Fault %s\n", FaultName(fault));
-    Stats::recordEvent(csprintf("Fault %s", FaultName(fault)));
+    this->recordEvent(csprintf("Fault %s", FaultName(fault)));
 
-    assert(!misspeculating());
-    kernelStats.fault(fault);
+//    kernelStats.fault(fault);
 
     if (fault == Arithmetic_Fault)
         panic("Arithmetic traps are unimplemented!");
 
-    AlphaISA::InternalProcReg *ipr = getIpr();
+    typename AlphaISA::InternalProcReg *ipr = getIpr();
 
     // exception restart address - Get the commit PC
-    if (fault != Interrupt_Fault || !PC_PAL(PC))
+    if (fault != Interrupt_Fault || !inPalMode(PC))
         ipr[AlphaISA::IPR_EXC_ADDR] = PC;
 
     if (fault == Pal_Fault || fault == Arithmetic_Fault /* ||
@@ -674,11 +325,12 @@ AlphaFullCPU<Impl>::trap(Fault fault)
         ipr[AlphaISA::IPR_EXC_ADDR] += 4;
     }
 
-    if (!PC_PAL(PC))
-        AlphaISA::swap_palshadow(&regs, true);
+    if (!inPalMode(PC))
+        swapPALShadow(true);
 
-    setPC( ipr[AlphaISA::IPR_PAL_BASE] + AlphaISA::fault_addr[fault] );
-    setNextPC(PC + sizeof(MachInst));
+    this->regFile.setPC( ipr[AlphaISA::IPR_PAL_BASE] +
+                         AlphaISA::fault_addr[fault] );
+    this->regFile.setNextPC(PC + sizeof(MachInst));
 }
 
 template <class Impl>
@@ -694,7 +346,7 @@ AlphaFullCPU<Impl>::processInterrupts()
 // same logical index.
 template <class Impl>
 void
-AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
+AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
 {
     if (palShadowEnabled == use_shadow)
         panic("swap_palshadow: wrong PAL shadow state");
@@ -703,6 +355,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
 
     // Will have to lookup in rename map to get physical registers, then
     // swap.
+/*
     for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
         if (reg_redir[i]) {
             AlphaISA::IntReg temp = regs->intRegFile[i];
@@ -710,6 +363,7 @@ AlphaFullCPU<Impl>::swap_palshadow(RegFile *regs, bool use_shadow)
             regs->palregs[i] = temp;
         }
     }
+*/
 }
 
 #endif // FULL_SYSTEM
diff --git a/cpu/beta_cpu/alpha_params.hh b/cpu/beta_cpu/alpha_params.hh
index ecde4b016..fb3468098 100644
--- a/cpu/beta_cpu/alpha_params.hh
+++ b/cpu/beta_cpu/alpha_params.hh
@@ -20,12 +20,12 @@ class MemInterface;
 class AlphaSimpleParams : public BaseFullCPU::Params
 {
   public:
+
 #ifdef FULL_SYSTEM
     AlphaITB *itb; AlphaDTB *dtb;
 #else
     std::vector<Process *> workload;
     Process *process;
-    short asid;
 #endif // FULL_SYSTEM
 
     FunctionalMemory *mem;
diff --git a/cpu/beta_cpu/comm.hh b/cpu/beta_cpu/comm.hh
index c0afe3d1b..18f76d921 100644
--- a/cpu/beta_cpu/comm.hh
+++ b/cpu/beta_cpu/comm.hh
@@ -50,7 +50,6 @@ struct SimpleIEWSimpleCommit {
     bool branchTaken;
     uint64_t mispredPC;
     uint64_t nextPC;
-    unsigned globalHist;
     InstSeqNum squashedSeqNum;
 };
 
@@ -78,7 +77,6 @@ struct TimeBufStruct {
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
-        unsigned globalHist;
     };
 
     decodeComm decodeInfo;
@@ -113,12 +111,11 @@ struct TimeBufStruct {
         bool branchTaken;
         uint64_t mispredPC;
         uint64_t nextPC;
-        unsigned globalHist;
 
         // Think of better names here.
         // Will need to be a variety of sizes...
         // Maybe make it a vector, that way only need one object.
-        std::vector<PhysRegIndex> freeRegs;
+//        std::vector<PhysRegIndex> freeRegs;
 
         bool robSquashing;
 
@@ -129,7 +126,7 @@ struct TimeBufStruct {
 
         // Extra bits of information so that the LDSTQ only updates when it
         // needs to.
-        bool commitIsStore;
+//        bool commitIsStore;
         bool commitIsLoad;
 
         // Communication specifically to the IQ to tell the IQ that it can
diff --git a/cpu/beta_cpu/commit.hh b/cpu/beta_cpu/commit.hh
index 731307bf7..c04dc8085 100644
--- a/cpu/beta_cpu/commit.hh
+++ b/cpu/beta_cpu/commit.hh
@@ -113,9 +113,6 @@ class SimpleCommit
     /** Pointer to FullCPU. */
     FullCPU *cpu;
 
-    /** Pointer to the rename map.  DO NOT USE if possible. */
-//    typename Impl::CPUPol::RenameMap *renameMap;
-
     //Store buffer interface?  Will need to move committed stores to the
     //store buffer
 
diff --git a/cpu/beta_cpu/commit_impl.hh b/cpu/beta_cpu/commit_impl.hh
index 3e97b980c..17ede9694 100644
--- a/cpu/beta_cpu/commit_impl.hh
+++ b/cpu/beta_cpu/commit_impl.hh
@@ -166,9 +166,9 @@ SimpleCommit<Impl>::commit()
     // hwrei() is what resets the PC to the place where instruction execution
     // beings again.
 #ifdef FULL_SYSTEM
-    if (ISA::check_interrupts &&
+    if (//checkInterrupts &&
         cpu->check_interrupts() &&
-        !xc->inPalMode()) {
+        !cpu->inPalMode(readCommitPC())) {
         // Will need to squash all instructions currently in flight and have
         // the interrupt handler restart at the last non-committed inst.
         // Most of that can be handled through the trap() function.  The
@@ -215,8 +215,6 @@ SimpleCommit<Impl>::commit()
 
         toIEW->commitInfo.mispredPC = fromIEW->mispredPC;
 
-        toIEW->commitInfo.globalHist = fromIEW->globalHist;
-
         if (toIEW->commitInfo.branchMispredict) {
             ++branchMispredicts;
         }
@@ -257,6 +255,9 @@ SimpleCommit<Impl>::commitInsts()
     // Can't commit and squash things at the same time...
     ////////////////////////////////////
 
+    if (rob->isEmpty())
+        return;
+
     DynInstPtr head_inst = rob->readHeadInst();
 
     unsigned num_committed = 0;
@@ -275,9 +276,11 @@ SimpleCommit<Impl>::commitInsts()
         if (head_inst->isSquashed()) {
             // Hack to avoid the instruction being retired (and deleted) if
             // it hasn't been through the IEW stage yet.
+/*
             if (!head_inst->isExecuted()) {
                 break;
             }
+*/
 
             DPRINTF(Commit, "Commit: Retiring squashed instruction from "
                     "ROB.\n");
@@ -341,7 +344,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         cpu->funcExeInst--;
 
-        if (head_inst->isStore() || head_inst->isNonSpeculative()) {
+        if (head_inst->isNonSpeculative()) {
             DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
                     "instruction at the head of the ROB, PC %#x.\n",
                     head_inst->readPC());
@@ -376,12 +379,14 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     }
 
     // Check if the instruction caused a fault.  If so, trap.
-    if (head_inst->getFault() != No_Fault) {
+    Fault inst_fault = head_inst->getFault();
+
+    if (inst_fault != No_Fault && inst_fault != Fake_Mem_Fault) {
         if (!head_inst->isNop()) {
 #ifdef FULL_SYSTEM
-            cpu->trap(fault);
+            cpu->trap(inst_fault);
 #else // !FULL_SYSTEM
-            panic("fault (%d) detected @ PC %08p", head_inst->getFault(),
+            panic("fault (%d) detected @ PC %08p", inst_fault,
                   head_inst->PC);
 #endif // FULL_SYSTEM
         }
@@ -390,7 +395,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     // Check if we're really ready to commit.  If not then return false.
     // I'm pretty sure all instructions should be able to commit if they've
     // reached this far.  For now leave this in as a check.
-    if(!rob->isHeadReady()) {
+    if (!rob->isHeadReady()) {
         panic("Commit: Unable to commit head instruction!\n");
         return false;
     }
@@ -413,17 +418,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         ++commitCommittedBranches;
     }
 
-
 #if 0
-    // Check if the instruction has a destination register.
-    // If so add the previous physical register of its logical register's
-    // destination to the free list through the time buffer.
-    for (int i = 0; i < head_inst->numDestRegs(); i++)
-    {
-        toIEW->commitInfo.freeRegs.push_back(head_inst->prevDestRegIdx(i));
-    }
-#endif
-
     // Explicit communication back to the LDSTQ that a load has been committed
     // and can be removed from the LDSTQ.  Stores don't need this because
     // the LDSTQ will already have been told that a store has reached the head
@@ -436,6 +431,7 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             ++commitCommittedLoads;
         }
     }
+#endif
 
     // Now that the instruction is going to be committed, finalize its
     // trace data.
@@ -487,7 +483,7 @@ SimpleCommit<Impl>::markCompletedInsts()
     // Grab completed insts out of the IEW instruction queue, and mark
     // instructions completed within the ROB.
     for (int inst_num = 0;
-         inst_num < iewWidth && fromIEW->insts[inst_num];
+         inst_num < fromIEW->size && fromIEW->insts[inst_num];
          ++inst_num)
     {
         DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n",
diff --git a/cpu/beta_cpu/cpu_policy.hh b/cpu/beta_cpu/cpu_policy.hh
index 6606aba29..50d231609 100644
--- a/cpu/beta_cpu/cpu_policy.hh
+++ b/cpu/beta_cpu/cpu_policy.hh
@@ -34,7 +34,7 @@ struct SimpleCPUPolicy
     typedef SimpleFetch<Impl> Fetch;
     typedef SimpleDecode<Impl> Decode;
     typedef SimpleRename<Impl> Rename;
-    typedef SimpleIEW<Impl, IQ> IEW;
+    typedef SimpleIEW<Impl> IEW;
     typedef SimpleCommit<Impl> Commit;
 
     /** The struct for communication between fetch and decode. */
diff --git a/cpu/beta_cpu/decode.hh b/cpu/beta_cpu/decode.hh
index dd18cf176..af2a5ee54 100644
--- a/cpu/beta_cpu/decode.hh
+++ b/cpu/beta_cpu/decode.hh
@@ -68,12 +68,16 @@ class SimpleDecode
     void squash();
 
   private:
+    inline bool fetchInstsValid();
+
     void block();
 
     inline void unblock();
 
     void squash(DynInstPtr &inst);
 
+    void dumpFetchQueue();
+
     // Interfaces to objects outside of decode.
     /** CPU interface. */
     FullCPU *cpu;
diff --git a/cpu/beta_cpu/decode_impl.hh b/cpu/beta_cpu/decode_impl.hh
index 9d88f94ac..43a4e8e95 100644
--- a/cpu/beta_cpu/decode_impl.hh
+++ b/cpu/beta_cpu/decode_impl.hh
@@ -99,6 +99,13 @@ SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 }
 
 template<class Impl>
+inline bool
+SimpleDecode<Impl>::fetchInstsValid()
+{
+    return fromFetch->size > 0;
+}
+
+template<class Impl>
 void
 SimpleDecode<Impl>::block()
 {
@@ -156,14 +163,14 @@ SimpleDecode<Impl>::squash(DynInstPtr &inst)
     // Set status to squashing.
     _status = Squashing;
 
-    // Maybe advance the time buffer?  Not sure what to do in the normal
-    // case.
-
     // Clear the skid buffer in case it has any data in it.
-    while (!skidBuffer.empty())
-    {
+    while (!skidBuffer.empty()) {
         skidBuffer.pop();
     }
+
+    // Squash instructions up until this one
+    // Slightly unrealistic!
+    cpu->removeInstsUntil(inst->seqNum);
 }
 
 template<class Impl>
@@ -205,7 +212,7 @@ SimpleDecode<Impl>::tick()
         if (_status == Unblocking) {
             ++decodeUnblockCycles;
 
-            if (fromFetch->size > 0) {
+            if (fetchInstsValid()) {
                 // Add the current inputs to the skid buffer so they can be
                 // reprocessed when this stage unblocks.
                 skidBuffer.push(*fromFetch);
@@ -216,7 +223,7 @@ SimpleDecode<Impl>::tick()
     } else if (_status == Blocked) {
         ++decodeBlockedCycles;
 
-        if (fromFetch->size > 0) {
+        if (fetchInstsValid()) {
             block();
         }
 
@@ -240,12 +247,12 @@ SimpleDecode<Impl>::tick()
             squash();
         }
     } else if (_status == Squashing) {
-        ++decodeSquashCycles;
-
         if (!fromCommit->commitInfo.squash &&
             !fromCommit->commitInfo.robSquashing) {
             _status = Running;
         } else if (fromCommit->commitInfo.squash) {
+            ++decodeSquashCycles;
+
             squash();
         }
     }
@@ -264,8 +271,7 @@ SimpleDecode<Impl>::decode()
     // Check time buffer if being told to stall.
     if (fromRename->renameInfo.stall ||
         fromIEW->iewInfo.stall ||
-        fromCommit->commitInfo.stall)
-    {
+        fromCommit->commitInfo.stall) {
         block();
         return;
     }
@@ -273,7 +279,7 @@ SimpleDecode<Impl>::decode()
     // Check fetch queue to see if instructions are available.
     // If no available instructions, do nothing, unless this stage is
     // currently unblocking.
-    if (fromFetch->size == 0 && _status != Unblocking) {
+    if (!fetchInstsValid() && _status != Unblocking) {
         DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
         // Should I change the status to idle?
         ++decodeIdleCycles;
@@ -286,7 +292,7 @@ SimpleDecode<Impl>::decode()
     unsigned to_rename_index = 0;
 
     int insts_available = _status == Unblocking ?
-        skidBuffer.front().size :
+        skidBuffer.front().size - numInst :
         fromFetch->size;
 
     // Debug block...
@@ -308,8 +314,8 @@ SimpleDecode<Impl>::decode()
     }
 #endif
 
-     while (insts_available > 0)
-     {
+    while (insts_available > 0)
+    {
         DPRINTF(Decode, "Decode: Sending instruction to rename.\n");
 
         inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
@@ -331,6 +337,16 @@ SimpleDecode<Impl>::decode()
             continue;
         }
 
+
+        // Also check if instructions have no source registers.  Mark
+        // them as ready to issue at any time.  Not sure if this check
+        // should exist here or at a later stage; however it doesn't matter
+        // too much for function correctness.
+        // Isn't this handled by the inst queue?
+        if (inst->numSrcRegs() == 0) {
+            inst->setCanIssue();
+        }
+
         // This current instruction is valid, so add it into the decode
         // queue.  The next instruction may not be valid, so check to
         // see if branches were predicted correctly.
@@ -369,16 +385,6 @@ SimpleDecode<Impl>::decode()
         // addr (either the immediate, or the branch PC + 4) and redirect
         // fetch if it's incorrect.
 
-
-        // Also check if instructions have no source registers.  Mark
-        // them as ready to issue at any time.  Not sure if this check
-        // should exist here or at a later stage; however it doesn't matter
-        // too much for function correctness.
-        // Isn't this handled by the inst queue?
-        if (inst->numSrcRegs() == 0) {
-            inst->setCanIssue();
-        }
-
         // Increment which instruction we're looking at.
         ++numInst;
         ++to_rename_index;
diff --git a/cpu/beta_cpu/fetch.hh b/cpu/beta_cpu/fetch.hh
index 7a3893708..da22baa9b 100644
--- a/cpu/beta_cpu/fetch.hh
+++ b/cpu/beta_cpu/fetch.hh
@@ -74,7 +74,6 @@ class SimpleFetch
 
     void processCacheCompletion();
 
-//  private:
     // Figure out PC vs next PC and how it should be updated
     void squash(const Addr &new_PC);
 
@@ -93,9 +92,6 @@ class SimpleFetch
      */
     bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);
 
-    // Might not want this function...
-//    inline void recordGlobalHist(DynInstPtr &inst);
-
     /**
      * Fetches the cache line that contains fetch_PC.  Returns any
      * fault that happened.  Puts the data into the class variable
@@ -184,9 +180,6 @@ class SimpleFetch
     /** Mask to get a cache block's address. */
     Addr cacheBlkMask;
 
-    /** The instruction being fetched. */
-//    MachInst inst;
-
     /** The cache line being fetched. */
     uint8_t *cacheData;
 
diff --git a/cpu/beta_cpu/fetch_impl.hh b/cpu/beta_cpu/fetch_impl.hh
index 90caf9ffe..0ec4c63a3 100644
--- a/cpu/beta_cpu/fetch_impl.hh
+++ b/cpu/beta_cpu/fetch_impl.hh
@@ -44,6 +44,8 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
       commitToFetchDelay(params.commitToFetchDelay),
       fetchWidth(params.fetchWidth)
 {
+    DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
+
     // Set status to idle.
     _status = Idle;
 
@@ -52,7 +54,7 @@ SimpleFetch<Impl>::SimpleFetch(Params &params)
     // Not sure of this parameter.  I think it should be based on the
     // thread number.
 #ifndef FULL_SYSTEM
-    memReq->asid = params.asid;
+    memReq->asid = 0;
 #else
     memReq->asid = 0;
 #endif // FULL_SYSTEM
@@ -163,21 +165,10 @@ SimpleFetch<Impl>::processCacheCompletion()
     // to return.
     // Can keep track of how many cache accesses go unused due to
     // misspeculation here.
-    // How to handle an outstanding miss which gets cancelled due to squash,
-    // then a new icache miss gets scheduled?
     if (_status == IcacheMissStall)
         _status = IcacheMissComplete;
 }
 
-#if 0
-template <class Impl>
-inline void
-SimpleFetch<Impl>::recordGlobalHist(DynInstPtr &inst)
-{
-    inst->setGlobalHist(branchPred.BPReadGlobalHist());
-}
-#endif
-
 template <class Impl>
 bool
 SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
@@ -311,7 +302,6 @@ SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
     cpu->removeInstsUntil(seq_num);
-
 }
 
 template <class Impl>
@@ -428,7 +418,9 @@ SimpleFetch<Impl>::tick()
         // Switch status to running
         _status = Running;
 
-        ++fetchSquashCycles;
+        ++fetchCycles;
+
+        fetch();
     } else if (_status != IcacheMissStall) {
         DPRINTF(Fetch, "Fetch: Running stage.\n");
 
diff --git a/cpu/beta_cpu/full_cpu.cc b/cpu/beta_cpu/full_cpu.cc
index 04c74393b..3cf5d4aaa 100644
--- a/cpu/beta_cpu/full_cpu.cc
+++ b/cpu/beta_cpu/full_cpu.cc
@@ -16,7 +16,7 @@
 using namespace std;
 
 BaseFullCPU::BaseFullCPU(Params &params)
-    : BaseCPU(&params)
+    : BaseCPU(&params), cpu_id(0)
 {
 }
 
@@ -82,15 +82,14 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
 
 #ifdef FULL_SYSTEM
       system(params.system),
-      memCtrl(system->memCtrl),
+      memCtrl(system->memctrl),
       physmem(system->physmem),
       itb(params.itb),
       dtb(params.dtb),
       mem(params.mem),
 #else
-      process(params.process),
-      asid(params.asid),
-      mem(process->getMemory()),
+      // Hardcoded for a single thread!!
+      mem(params.workload[0]->getMemory()),
 #endif // FULL_SYSTEM
 
       icacheInterface(params.icacheInterface),
@@ -100,20 +99,40 @@ FullBetaCPU<Impl>::FullBetaCPU(Params &params)
       funcExeInst(0)
 {
     _status = Idle;
+
+#ifndef FULL_SYSTEM
+    thread.resize(this->number_of_threads);
+#endif
+
+    for (int i = 0; i < this->number_of_threads; ++i) {
 #ifdef FULL_SYSTEM
-    xc = new ExecContext(this, 0, system, itb, dtb, mem);
+        assert(i == 0);
+        system->execContexts[i] =
+            new ExecContext(this, i, system, itb, dtb, mem);
 
-    // initialize CPU, including PC
-    TheISA::initCPU(&xc->regs);
+        // initialize CPU, including PC
+        TheISA::initCPU(&system->execContexts[i]->regs);
+        execContexts.push_back(system->execContexts[i]);
 #else
-    DPRINTF(FullCPU, "FullCPU: Process's starting PC is %#x, process is %#x",
-            process->prog_entry, process);
-    xc = new ExecContext(this, /* thread_num */ 0, process, /* asid */ 0);
-
-    assert(process->getMemory() != NULL);
-    assert(mem != NULL);
+        if (i < params.workload.size()) {
+            DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
+                    "process is %#x",
+                    i, params.workload[i]->prog_entry, thread[i]);
+            thread[i] = new ExecContext(this, i, params.workload[i], i);
+        }
+        assert(params.workload[i]->getMemory() != NULL);
+        assert(mem != NULL);
+        execContexts.push_back(thread[i]);
 #endif // !FULL_SYSTEM
-    execContexts.push_back(xc);
+    }
+
+    // Note that this is a hack so that my code which still uses xc-> will
+    // still work.  I should remove this eventually
+#ifdef FULL_SYSTEM
+    xc = system->execContexts[0];
+#else
+    xc = thread[0];
+#endif
 
     // The stages also need their CPU pointer setup.  However this must be
     // done at the upper level CPU because they have pointers to the upper
@@ -202,29 +221,33 @@ FullBetaCPU<Impl>::init()
 
         // Need to do a copy of the xc->regs into the CPU's regfile so
         // that it can start properly.
-
+#ifdef FULL_SYSTEM
+        ExecContext *src_xc = system->execContexts[0];
+#else
+        ExecContext *src_xc = thread[0];
+#endif
         // First loop through the integer registers.
         for (int i = 0; i < Impl::ISA::NumIntRegs; ++i)
         {
-            regFile.intRegFile[i] = xc->regs.intRegFile[i];
+            regFile.intRegFile[i] = src_xc->regs.intRegFile[i];
         }
 
         // Then loop through the floating point registers.
         for (int i = 0; i < Impl::ISA::NumFloatRegs; ++i)
         {
-            regFile.floatRegFile[i].d = xc->regs.floatRegFile.d[i];
-            regFile.floatRegFile[i].q = xc->regs.floatRegFile.q[i];
+            regFile.floatRegFile[i].d = src_xc->regs.floatRegFile.d[i];
+            regFile.floatRegFile[i].q = src_xc->regs.floatRegFile.q[i];
         }
 
         // Then loop through the misc registers.
-        regFile.miscRegs.fpcr = xc->regs.miscRegs.fpcr;
-        regFile.miscRegs.uniq = xc->regs.miscRegs.uniq;
-        regFile.miscRegs.lock_flag = xc->regs.miscRegs.lock_flag;
-        regFile.miscRegs.lock_addr = xc->regs.miscRegs.lock_addr;
+        regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr;
+        regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq;
+        regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag;
+        regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr;
 
         // Then finally set the PC and the next PC.
-        regFile.pc = xc->regs.pc;
-        regFile.npc = xc->regs.npc;
+        regFile.pc = src_xc->regs.pc;
+        regFile.npc = src_xc->regs.npc;
     }
 }
 
@@ -277,13 +300,13 @@ FullBetaCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
 
     // Set all status's to active, schedule the
     // CPU's tick event.
-    tickEvent.schedule(curTick);
     for (int i = 0; i < execContexts.size(); ++i) {
-        execContexts[i]->activate();
+        ExecContext *xc = execContexts[i];
+        if (xc->status() == ExecContext::Active && _status != Running) {
+            _status = Running;
+            tickEvent.schedule(curTick);
+        }
     }
-
-    // Switch out the other CPU.
-    oldCPU->switchOut();
 }
 
 template <class Impl>
@@ -463,6 +486,7 @@ FullBetaCPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
                 inst_to_delete->seqNum, inst_to_delete->readPC());
 
         // Remove the instruction from the list.
+        instList.back() = NULL;
         instList.pop_back();
 
         // Mark it as squashed.
diff --git a/cpu/beta_cpu/full_cpu.hh b/cpu/beta_cpu/full_cpu.hh
index 8ce32b7c7..85fc49371 100644
--- a/cpu/beta_cpu/full_cpu.hh
+++ b/cpu/beta_cpu/full_cpu.hh
@@ -5,11 +5,12 @@
 //itself properly.  Constructor.  Derived alpha class.  Threads!
 // Avoid running stages and advancing queues if idle/stalled.
 
-#ifndef __SIMPLE_FULL_CPU_HH__
-#define __SIMPLE_FULL_CPU_HH__
+#ifndef __CPU_BETA_CPU_FULL_CPU_HH__
+#define __CPU_BETA_CPU_FULL_CPU_HH__
 
 #include <iostream>
 #include <list>
+#include <vector>
 
 #include "cpu/beta_cpu/comm.hh"
 
@@ -20,6 +21,11 @@
 #include "cpu/beta_cpu/cpu_policy.hh"
 #include "sim/process.hh"
 
+#ifdef FULL_SYSTEM
+#include "arch/alpha/ev5.hh"
+using namespace EV5;
+#endif
+
 class FunctionalMemory;
 class Process;
 
@@ -34,6 +40,9 @@ class BaseFullCPU : public BaseCPU
 #else
     BaseFullCPU(Params &params);
 #endif // FULL_SYSTEM
+
+  private:
+    int cpu_id;
 };
 
 template <class Impl>
@@ -41,6 +50,7 @@ class FullBetaCPU : public BaseFullCPU
 {
   public:
     //Put typedefs from the Impl here.
+    typedef typename Impl::ISA ISA;
     typedef typename Impl::CPUPol CPUPolicy;
     typedef typename Impl::Params Params;
     typedef typename Impl::DynInstPtr DynInstPtr;
@@ -114,19 +124,21 @@ class FullBetaCPU : public BaseFullCPU
     bool validDataAddr(Addr addr) { return true; }
 
     /** Get instruction asid. */
-    int getInstAsid() { return ITB_ASN_ASN(regs.ipr[ISA::IPR_ITB_ASN]); }
+    int getInstAsid()
+    { return ITB_ASN_ASN(regFile.getIpr()[ISA::IPR_ITB_ASN]); }
 
     /** Get data asid. */
-    int getDataAsid() { return DTB_ASN_ASN(regs.ipr[ISA::IPR_DTB_ASN]); }
+    int getDataAsid()
+    { return DTB_ASN_ASN(regFile.getIpr()[ISA::IPR_DTB_ASN]); }
 #else
     bool validInstAddr(Addr addr)
-    { return process->validInstAddr(addr); }
+    { return thread[0]->validInstAddr(addr); }
 
     bool validDataAddr(Addr addr)
-    { return process->validDataAddr(addr); }
+    { return thread[0]->validDataAddr(addr); }
 
-    int getInstAsid() { return asid; }
-    int getDataAsid() { return asid; }
+    int getInstAsid() { return thread[0]->asid; }
+    int getDataAsid() { return thread[0]->asid; }
 
 #endif
 
@@ -284,7 +296,14 @@ class FullBetaCPU : public BaseFullCPU
     ExecContext *xc;
 
     /** Temporary function to get pointer to exec context. */
-    ExecContext *xcBase() { return xc; }
+    ExecContext *xcBase()
+    {
+#ifdef FULL_SYSTEM
+        return system->execContexts[0];
+#else
+        return thread[0];
+#endif
+    }
 
     InstSeqNum globalSeqNum;
 
@@ -299,12 +318,7 @@ class FullBetaCPU : public BaseFullCPU
 
 //    SWContext *swCtx;
 #else
-    Process *process;
-
-    // Address space ID.  Note that this is used for TIMING cache
-    // simulation only; all functional memory accesses should use
-    // one of the FunctionalMemory pointers above.
-    short asid;
+    std::vector<ExecContext *> thread;
 #endif
 
     FunctionalMemory *mem;
diff --git a/cpu/beta_cpu/iew.cc b/cpu/beta_cpu/iew.cc
index a90d64434..626c4a90f 100644
--- a/cpu/beta_cpu/iew.cc
+++ b/cpu/beta_cpu/iew.cc
@@ -4,4 +4,4 @@
 #include "cpu/beta_cpu/iew_impl.hh"
 #include "cpu/beta_cpu/inst_queue.hh"
 
-template class SimpleIEW<AlphaSimpleImpl, AlphaSimpleImpl::CPUPol::IQ>;
+template class SimpleIEW<AlphaSimpleImpl>;
diff --git a/cpu/beta_cpu/iew.hh b/cpu/beta_cpu/iew.hh
index e3e7c6db5..1e5eb2244 100644
--- a/cpu/beta_cpu/iew.hh
+++ b/cpu/beta_cpu/iew.hh
@@ -14,7 +14,7 @@
 //Can IEW even stall?  Space should be available/allocated already...maybe
 //if there's not enough write ports on the ROB or waiting for CDB
 //arbitration.
-template<class Impl, class IQ>
+template<class Impl>
 class SimpleIEW
 {
   private:
@@ -25,6 +25,7 @@ class SimpleIEW
     typedef typename Impl::FullCPU FullCPU;
     typedef typename Impl::Params Params;
 
+    typedef typename CPUPol::IQ IQ;
     typedef typename CPUPol::RenameMap RenameMap;
     typedef typename CPUPol::LDSTQ LDSTQ;
 
@@ -33,6 +34,7 @@ class SimpleIEW
     typedef typename CPUPol::RenameStruct RenameStruct;
     typedef typename CPUPol::IssueStruct IssueStruct;
 
+    friend class Impl::FullCPU;
   public:
     enum Status {
         Running,
@@ -49,15 +51,17 @@ class SimpleIEW
     Status _wbStatus;
 
   public:
-    void squash();
-
-    void squashDueToBranch(DynInstPtr &inst);
-
-    void squashDueToMem(DynInstPtr &inst);
+    class WritebackEvent : public Event {
+      private:
+        DynInstPtr inst;
+        SimpleIEW<Impl> *iewStage;
 
-    void block();
+      public:
+        WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew);
 
-    inline void unblock();
+        virtual void process();
+        virtual const char *description();
+    };
 
   public:
     SimpleIEW(Params &params);
@@ -74,17 +78,30 @@ class SimpleIEW
 
     void setRenameMap(RenameMap *rm_ptr);
 
-    void wakeDependents(DynInstPtr &inst);
+    void squash();
 
-    void tick();
+    void squashDueToBranch(DynInstPtr &inst);
 
-    void iew();
+    void squashDueToMem(DynInstPtr &inst);
+
+    void block();
+
+    inline void unblock();
+
+    void wakeDependents(DynInstPtr &inst);
+
+    void instToCommit(DynInstPtr &inst);
 
   private:
     void dispatchInsts();
 
     void executeInsts();
 
+  public:
+    void tick();
+
+    void iew();
+
     //Interfaces to objects inside and outside of IEW.
     /** Time buffer interface. */
     TimeBuffer<TimeStruct> *timeBuffer;
@@ -121,11 +138,18 @@ class SimpleIEW
     /** Skid buffer between rename and IEW. */
     std::queue<RenameStruct> skidBuffer;
 
+  protected:
     /** Instruction queue. */
     IQ instQueue;
 
     LDSTQ ldstQueue;
 
+#ifndef FULL_SYSTEM
+  public:
+    void lsqWriteback();
+#endif
+
+  private:
     /** Pointer to rename map.  Might not want this stage to directly
      *  access this though...
      */
diff --git a/cpu/beta_cpu/iew_impl.hh b/cpu/beta_cpu/iew_impl.hh
index 1d072ab33..086d39320 100644
--- a/cpu/beta_cpu/iew_impl.hh
+++ b/cpu/beta_cpu/iew_impl.hh
@@ -12,8 +12,36 @@
 #include "base/timebuf.hh"
 #include "cpu/beta_cpu/iew.hh"
 
-template<class Impl, class IQ>
-SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
+template<class Impl>
+SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst,
+                                                SimpleIEW<Impl> *_iew)
+    : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew)
+{
+    this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::WritebackEvent::process()
+{
+    DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n");
+
+    // Need to insert instruction into queue to commit
+    iewStage->instToCommit(inst);
+    // Need to execute second half of the instruction, do actual writing to
+    // registers and such
+    inst->execute();
+}
+
+template<class Impl>
+const char *
+SimpleIEW<Impl>::WritebackEvent::description()
+{
+    return "LSQ writeback event";
+}
+
+template<class Impl>
+SimpleIEW<Impl>::SimpleIEW(Params &params)
     : // Just make this time buffer really big for now
       issueToExecQueue(5, 5),
       instQueue(params),
@@ -36,11 +64,13 @@ SimpleIEW<Impl, IQ>::SimpleIEW(Params &params)
 
     // Instruction queue needs the queue between issue and execute.
     instQueue.setIssueToExecuteQueue(&issueToExecQueue);
+
+    ldstQueue.setIEW(this);
 }
 
-template <class Impl, class IQ>
+template <class Impl>
 void
-SimpleIEW<Impl, IQ>::regStats()
+SimpleIEW<Impl>::regStats()
 {
     instQueue.regStats();
 
@@ -111,9 +141,9 @@ SimpleIEW<Impl, IQ>::regStats()
         .desc("Number of branches that were predicted taken incorrectly");
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
+SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr)
 {
     DPRINTF(IEW, "IEW: Setting CPU pointer.\n");
     cpu = cpu_ptr;
@@ -122,9 +152,9 @@ SimpleIEW<Impl, IQ>::setCPU(FullCPU *cpu_ptr)
     ldstQueue.setCPU(cpu_ptr);
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
     DPRINTF(IEW, "IEW: Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
@@ -139,9 +169,9 @@ SimpleIEW<Impl, IQ>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
     instQueue.setTimeBuffer(tb_ptr);
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
     DPRINTF(IEW, "IEW: Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
@@ -150,9 +180,9 @@ SimpleIEW<Impl, IQ>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
     fromRename = renameQueue->getWire(-renameToIEWDelay);
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
     DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
@@ -161,24 +191,70 @@ SimpleIEW<Impl, IQ>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
     toCommit = iewQueue->getWire(0);
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::setRenameMap(RenameMap *rm_ptr)
+SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr)
 {
     DPRINTF(IEW, "IEW: Setting rename map pointer.\n");
     renameMap = rm_ptr;
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::wakeDependents(DynInstPtr &inst)
+SimpleIEW<Impl>::squash()
 {
-    instQueue.wakeDependents(inst);
+    DPRINTF(IEW, "IEW: Squashing all instructions.\n");
+    _status = Squashing;
+
+    // Tell the IQ to start squashing.
+    instQueue.squash();
+
+    // Tell the LDSTQ to start squashing.
+    ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
+            inst->PC);
+    // Perhaps leave the squashing up to the ROB stage to tell it when to
+    // squash?
+    _status = Squashing;
+
+    // Tell rename to squash through the time buffer.
+    toCommit->squash = true;
+    // Also send PC update information back to prior stages.
+    toCommit->squashedSeqNum = inst->seqNum;
+    toCommit->mispredPC = inst->readPC();
+    toCommit->nextPC = inst->readNextPC();
+    toCommit->branchMispredict = true;
+    // Prediction was incorrect, so send back inverse.
+    toCommit->branchTaken = inst->readNextPC() !=
+        (inst->readPC() + sizeof(MachInst));
+}
+
+template<class Impl>
+void
+SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst)
+{
+    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
+            inst->PC);
+    // Perhaps leave the squashing up to the ROB stage to tell it when to
+    // squash?
+    _status = Squashing;
+
+    // Tell rename to squash through the time buffer.
+    toCommit->squash = true;
+    // Also send PC update information back to prior stages.
+    toCommit->squashedSeqNum = inst->seqNum;
+    toCommit->nextPC = inst->readNextPC();
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::block()
+SimpleIEW<Impl>::block()
 {
     DPRINTF(IEW, "IEW: Blocking.\n");
     // Set the status to Blocked.
@@ -193,9 +269,9 @@ SimpleIEW<Impl, IQ>::block()
     // the previous stages are expected to check all possible stall signals.
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 inline void
-SimpleIEW<Impl, IQ>::unblock()
+SimpleIEW<Impl>::unblock()
 {
     // Check if there's information in the skid buffer.  If there is, then
     // set status to unblocking, otherwise set it directly to running.
@@ -215,62 +291,24 @@ SimpleIEW<Impl, IQ>::unblock()
     }
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::squash()
+SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst)
 {
-    DPRINTF(IEW, "IEW: Squashing all instructions.\n");
-    _status = Squashing;
-
-    // Tell the IQ to start squashing.
-    instQueue.squash();
-
-    // Tell the LDSTQ to start squashing.
-    ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
+    instQueue.wakeDependents(inst);
 }
 
-template<class Impl, class IQ>
-void
-SimpleIEW<Impl, IQ>::squashDueToBranch(DynInstPtr &inst)
-{
-    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
-            inst->PC);
-    // Perhaps leave the squashing up to the ROB stage to tell it when to
-    // squash?
-    _status = Squashing;
-
-    // Tell rename to squash through the time buffer.
-    toCommit->squash = true;
-    // Also send PC update information back to prior stages.
-    toCommit->squashedSeqNum = inst->seqNum;
-    toCommit->mispredPC = inst->readPC();
-    toCommit->nextPC = inst->readNextPC();
-    toCommit->branchMispredict = true;
-    // Prediction was incorrect, so send back inverse.
-    toCommit->branchTaken = inst->readNextPC() !=
-        (inst->readPC() + sizeof(MachInst));
-}
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::squashDueToMem(DynInstPtr &inst)
+SimpleIEW<Impl>::instToCommit(DynInstPtr &inst)
 {
-    DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
-            inst->PC);
-    // Perhaps leave the squashing up to the ROB stage to tell it when to
-    // squash?
-    _status = Squashing;
 
-    // Tell rename to squash through the time buffer.
-    toCommit->squash = true;
-    // Also send PC update information back to prior stages.
-    toCommit->squashedSeqNum = inst->seqNum;
-    toCommit->nextPC = inst->readNextPC();
 }
 
-template <class Impl, class IQ>
+template <class Impl>
 void
-SimpleIEW<Impl, IQ>::dispatchInsts()
+SimpleIEW<Impl>::dispatchInsts()
 {
     ////////////////////////////////////////
     // DISPATCH/ISSUE stage
@@ -329,14 +367,14 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
                 // a signal to this stage to issue and execute that
                 // store.  Change to be a bit that says the instruction
                 // has extra work to do at commit.
-                inst->setCanCommit();
+//                inst->setCanCommit();
 
-                instQueue.insertNonSpec(inst);
+//                instQueue.insertNonSpec(inst);
 
                 ++iewDispStoreInsts;
-                ++iewDispNonSpecInsts;
+//                ++iewDispNonSpecInsts;
 
-                continue;
+//                continue;
             } else if (inst->isNonSpeculative()) {
                 DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
                         "encountered, skipping.\n");
@@ -385,9 +423,9 @@ SimpleIEW<Impl, IQ>::dispatchInsts()
     }
 }
 
-template <class Impl, class IQ>
+template <class Impl>
 void
-SimpleIEW<Impl, IQ>::executeInsts()
+SimpleIEW<Impl>::executeInsts()
 {
     ////////////////////////////////////////
     //EXECUTE/WRITEBACK stage
@@ -403,6 +441,8 @@ SimpleIEW<Impl, IQ>::executeInsts()
 
     int fu_usage = 0;
     bool fetch_redirect = false;
+    int inst_slot = 0;
+    int time_slot = 0;
 
     // Execute/writeback any instructions that are available.
     for (int inst_num = 0;
@@ -452,7 +492,7 @@ SimpleIEW<Impl, IQ>::executeInsts()
 
                 ++iewExecLoadInsts;
             } else if (inst->isStore()) {
-                ldstQueue.executeStore();
+                ldstQueue.executeStore(inst);
 
                 ++iewExecStoreInsts;
             } else {
@@ -473,9 +513,23 @@ SimpleIEW<Impl, IQ>::executeInsts()
         // For now naively assume that all instructions take one cycle.
         // Otherwise would have to look into the time buffer based on the
         // latency of the instruction.
+        (*iewQueue)[time_slot].insts[inst_slot];
+        while ((*iewQueue)[time_slot].insts[inst_slot]) {
+            if (inst_slot < issueWidth) {
+                ++inst_slot;
+            } else {
+                ++time_slot;
+                inst_slot = 0;
+            }
+
+            assert(time_slot < 5);
+        }
+
+        // May actually have to work this out, especially with loads and stores
 
         // Add finished instruction to queue to commit.
-        toCommit->insts[inst_num] = inst;
+        (*iewQueue)[time_slot].insts[inst_slot] = inst;
+        (*iewQueue)[time_slot].size++;
 
         // Check if branch was correct.  This check happens after the
         // instruction is added to the queue because even if the branch
@@ -518,9 +572,9 @@ SimpleIEW<Impl, IQ>::executeInsts()
     }
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::tick()
+SimpleIEW<Impl>::tick()
 {
     // Considering putting all the state-determining stuff in this section.
 
@@ -594,14 +648,20 @@ SimpleIEW<Impl, IQ>::tick()
     // Write back number of free IQ entries here.
     toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
 
+    ldstQueue.writebackStores();
+
     // Check the committed load/store signals to see if there's a load
     // or store to commit.  Also check if it's being told to execute a
     // nonspeculative instruction.
-    if (fromCommit->commitInfo.commitIsStore) {
+    // This is pretty inefficient...
+//    if (0/*fromCommit->commitInfo.commitIsStore*/) {
+    if (!fromCommit->commitInfo.squash &&
+        !fromCommit->commitInfo.robSquashing) {
         ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
-    } else if (fromCommit->commitInfo.commitIsLoad) {
+//    } else if (fromCommit->commitInfo.commitIsLoad) {
         ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
     }
+//    }
 
     if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
         instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
@@ -611,9 +671,9 @@ SimpleIEW<Impl, IQ>::tick()
             instQueue.numFreeEntries());
 }
 
-template<class Impl, class IQ>
+template<class Impl>
 void
-SimpleIEW<Impl, IQ>::iew()
+SimpleIEW<Impl>::iew()
 {
     // Might want to put all state checks in the tick() function.
     // Check if being told to stall from commit.
@@ -663,3 +723,12 @@ SimpleIEW<Impl, IQ>::iew()
     // Not the best place for it, but this works (hopefully).
     issueToExecQueue.advance();
 }
+
+#ifndef FULL_SYSTEM
+template<class Impl>
+void
+SimpleIEW<Impl>::lsqWriteback()
+{
+    ldstQueue.writebackAllInsts();
+}
+#endif
diff --git a/cpu/beta_cpu/inst_queue.hh b/cpu/beta_cpu/inst_queue.hh
index 6fcce70a4..120e6b940 100644
--- a/cpu/beta_cpu/inst_queue.hh
+++ b/cpu/beta_cpu/inst_queue.hh
@@ -174,7 +174,7 @@ class InstructionQueue
      *  once the IQ gets a signal from commit.  While it's redundant to
      *  have the key be a part of the value (the sequence number is stored
      *  inside of DynInst), when these instructions are woken up only
-     *  the sequence number will be available.  Thus it is necessary to be
+     *  the sequence number will be available.  Thus it is most efficient to be
      *  able to search by the sequence number alone.
      */
     std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
diff --git a/cpu/beta_cpu/inst_queue_impl.hh b/cpu/beta_cpu/inst_queue_impl.hh
index c688181ed..d4e3939cf 100644
--- a/cpu/beta_cpu/inst_queue_impl.hh
+++ b/cpu/beta_cpu/inst_queue_impl.hh
@@ -31,8 +31,6 @@ InstructionQueue<Impl>::InstructionQueue(Params &params)
       numPhysFloatRegs(params.numPhysFloatRegs),
       commitToIEWDelay(params.commitToIEWDelay)
 {
-    DPRINTF(IQ, "IQ: Int width is %i.\n", params.executeIntWidth);
-
     // Initialize the number of free IQ entries.
     freeEntries = numEntries;
 
@@ -291,10 +289,6 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
     // Decrease the number of free entries.
     --freeEntries;
 
-    // Look through its source registers (physical regs), and mark any
-    // dependencies.
-//    addToDependents(inst);
-
     // Have this instruction set itself as the producer of its destination
     // register(s).
     createDependency(inst);
@@ -568,15 +562,20 @@ InstructionQueue<Impl>::scheduleReadyInsts()
             break;
 
           case Squashed:
-            issuing_inst = squashed_head_inst;
+//            issuing_inst = squashed_head_inst;
+            assert(0 && "Squashed insts should not issue any more!");
             squashedInsts.pop();
+            // Set the squashed instruction as able to commit so that commit
+            // can just drop it from the ROB.  This is a bit faked.
             ++squashed_issued;
+            ++freeEntries;
+
             DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n",
-                    issuing_inst->readPC());
+                    squashed_head_inst->readPC());
             break;
         }
 
-        if (list_with_oldest != None) {
+        if (list_with_oldest != None && list_with_oldest != Squashed) {
             i2e_info->insts[total_issued] = issuing_inst;
             i2e_info->size++;
 
@@ -641,8 +640,10 @@ InstructionQueue<Impl>::squash()
     // Setup the squash iterator to point to the tail.
     squashIt = tail;
 
-    // Call doSquash.
-    doSquash();
+    // Call doSquash if there are insts in the IQ
+    if (freeEntries != numEntries) {
+        doSquash();
+    }
 
     // Also tell the memory dependence unit to squash.
     memDepUnit.squash(squashedSeqNum);
@@ -672,12 +673,12 @@ InstructionQueue<Impl>::doSquash()
             // Remove the instruction from the dependency list.
             // Hack for now: These below don't add themselves to the
             // dependency list, so don't try to remove them.
-            if (!squashed_inst->isNonSpeculative() &&
-                !squashed_inst->isStore()) {
-                int8_t total_src_regs = squashed_inst->numSrcRegs();
+            if (!squashed_inst->isNonSpeculative()/* &&
+                                                     !squashed_inst->isStore()*/
+                ) {
 
                 for (int src_reg_idx = 0;
-                     src_reg_idx < total_src_regs;
+                     src_reg_idx < squashed_inst->numSrcRegs();
                      src_reg_idx++)
                 {
                     PhysRegIndex src_reg =
@@ -699,6 +700,8 @@ InstructionQueue<Impl>::doSquash()
 
                 // Might want to remove producers as well.
             } else {
+                nonSpecInsts[squashed_inst->seqNum] = NULL;
+
                 nonSpecInsts.erase(squashed_inst->seqNum);
 
                 ++iqSquashedNonSpecRemoved;
@@ -709,7 +712,11 @@ InstructionQueue<Impl>::doSquash()
             // Mark it as squashed within the IQ.
             squashed_inst->setSquashedInIQ();
 
-            squashedInsts.push(squashed_inst);
+//            squashedInsts.push(squashed_inst);
+            squashed_inst->setIssued();
+            squashed_inst->setCanCommit();
+
+            ++freeEntries;
 
             DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n",
                     squashed_inst->readPC());
@@ -718,6 +725,13 @@ InstructionQueue<Impl>::doSquash()
         --squashIt;
         ++iqSquashedInstsExamined;
     }
+
+    assert(freeEntries <= numEntries);
+
+    if (freeEntries == numEntries) {
+        tail = cpu->instList.end();
+    }
+
 }
 
 template <class Impl>
@@ -739,8 +753,6 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
     //Look at the physical destination register of the DynInst
     //and look it up on the dependency graph.  Then mark as ready
     //any instructions within the instruction queue.
-    int8_t total_dest_regs = completed_inst->numDestRegs();
-
     DependencyEntry *curr;
 
     // Tell the memory dependence unit to wake any dependents on this
@@ -751,7 +763,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
     }
 
     for (int dest_reg_idx = 0;
-         dest_reg_idx < total_dest_regs;
+         dest_reg_idx < completed_inst->numDestRegs();
          dest_reg_idx++)
     {
         PhysRegIndex dest_reg =
@@ -759,7 +771,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
 
         // Special case of uniq or control registers.  They are not
         // handled by the IQ and thus have no dependency graph entry.
-        // @todo Figure out a cleaner way to handle thie.
+        // @todo Figure out a cleaner way to handle this.
         if (dest_reg >= numPhysRegs) {
             continue;
         }
@@ -789,6 +801,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
 
             DependencyEntry::mem_alloc_counter--;
 
+            curr->inst = NULL;
+
             delete curr;
         }
 
@@ -874,7 +888,10 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
 
         dependGraph[dest_reg].inst = new_inst;
 
-        assert(!dependGraph[dest_reg].next);
+        if (dependGraph[dest_reg].next) {
+            dumpDependGraph();
+            panic("IQ: Dependency graph not empty!");
+        }
 
         // Mark the scoreboard to say it's not yet ready.
         regScoreboard[dest_reg] = false;
@@ -929,34 +946,10 @@ InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
 
     --mem_alloc_counter;
 
-    delete curr;
-}
-
-template <class Impl>
-void
-InstructionQueue<Impl>::dumpDependGraph()
-{
-    DependencyEntry *curr;
+    // Could push this off to the destructor of DependencyEntry
+    curr->inst = NULL;
 
-    for (int i = 0; i < numPhysRegs; ++i)
-    {
-        curr = &dependGraph[i];
-
-        if (curr->inst) {
-            cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
-                    curr->inst->readPC());
-        } else {
-            cprintf("dependGraph[%i]: No producer. consumer: ", i);
-        }
-
-        while (curr->next != NULL) {
-            curr = curr->next;
-
-            cprintf("%#x ", curr->inst->readPC());
-        }
-
-        cprintf("\n");
-    }
+    delete curr;
 }
 
 template <class Impl>
@@ -1024,6 +1017,12 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
     }
 }
 
+/*
+ * Caution, this function must not be called prior to tail being updated at
+ * least once, otherwise it will fail the assertion.  This is because
+ * instList.begin() actually changes upon the insertion of an element into the
+ * list when the list is empty.
+ */
 template <class Impl>
 int
 InstructionQueue<Impl>::countInsts()
@@ -1031,6 +1030,9 @@ InstructionQueue<Impl>::countInsts()
     ListIt count_it = cpu->instList.begin();
     int total_insts = 0;
 
+    if (tail == cpu->instList.end())
+        return 0;
+
     while (count_it != tail) {
         if (!(*count_it)->isIssued()) {
             ++total_insts;
@@ -1053,6 +1055,33 @@ InstructionQueue<Impl>::countInsts()
 
 template <class Impl>
 void
+InstructionQueue<Impl>::dumpDependGraph()
+{
+    DependencyEntry *curr;
+
+    for (int i = 0; i < numPhysRegs; ++i)
+    {
+        curr = &dependGraph[i];
+
+        if (curr->inst) {
+            cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
+                    curr->inst->readPC());
+        } else {
+            cprintf("dependGraph[%i]: No producer. consumer: ", i);
+        }
+
+        while (curr->next != NULL) {
+            curr = curr->next;
+
+            cprintf("%#x ", curr->inst->readPC());
+        }
+
+        cprintf("\n");
+    }
+}
+
+template <class Impl>
+void
 InstructionQueue<Impl>::dumpLists()
 {
     cprintf("Ready integer list size: %i\n", readyIntInsts.size());
diff --git a/cpu/beta_cpu/regfile.hh b/cpu/beta_cpu/regfile.hh
index a81ed63bc..c9d1b092f 100644
--- a/cpu/beta_cpu/regfile.hh
+++ b/cpu/beta_cpu/regfile.hh
@@ -1,18 +1,26 @@
-#ifndef __REGFILE_HH__
-#define __REGFILE_HH__
+#ifndef __CPU_BETA_CPU_REGFILE_HH__
+#define __CPU_BETA_CPU_REGFILE_HH__
 
 // @todo: Destructor
 
 #include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
 #include "cpu/beta_cpu/comm.hh"
 
-#include "base/trace.hh"
+#ifdef FULL_SYSTEM
+#include "kern/kernel_stats.hh"
+#include "arch/alpha/ev5.hh"
+
+using namespace EV5;
+#endif
 
 // This really only depends on the ISA, and not the Impl.  It might be nicer
 // to see if I can make it depend on nothing...
 // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
 // and should go in the AlphaFullCPU.
 
+extern void debug_break();
+
 template <class Impl>
 class PhysRegFile
 {
@@ -27,6 +35,7 @@ class PhysRegFile
     //be private eventually with some accessor functions.
   public:
     typedef typename Impl::ISA ISA;
+    typedef typename Impl::FullCPU FullCPU;
 
     PhysRegFile(unsigned _numPhysicalIntRegs,
                 unsigned _numPhysicalFloatRegs);
@@ -177,6 +186,7 @@ class PhysRegFile
 #ifdef FULL_SYSTEM
     uint64_t readIpr(int idx, Fault &fault);
     Fault setIpr(int idx, uint64_t val);
+    InternalProcReg *getIpr() { return ipr; }
     int readIntrFlag() { return intrflag; }
     void setIntrFlag(int val) { intrflag = val; }
 #endif
@@ -196,7 +206,21 @@ class PhysRegFile
     Addr pc;            // program counter
     Addr npc;            // next-cycle program counter
 
+#ifdef FULL_SYSTEM
   private:
+    // This is ISA specifc stuff; remove it eventually once ISAImpl is used
+    IntReg palregs[NumIntRegs];	// PAL shadow registers
+    InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs
+    int intrflag;			// interrupt flag
+    bool pal_shadow;		// using pal_shadow registers
+#endif
+
+  private:
+    FullCPU *cpu;
+
+  public:
+    void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; }
+
     unsigned numPhysicalIntRegs;
     unsigned numPhysicalFloatRegs;
 };
@@ -269,46 +293,42 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
       case ISA::IPR_IPLR:
       case ISA::IPR_INTID:
       case ISA::IPR_PMCTR:
-    // no side-effect
-    retval = ipr[idx];
-    break;
+        // no side-effect
+        retval = ipr[idx];
+        break;
 
       case ISA::IPR_CC:
-    retval |= ipr[idx] & ULL(0xffffffff00000000);
-    retval |= curTick  & ULL(0x00000000ffffffff);
-    break;
+        retval |= ipr[idx] & ULL(0xffffffff00000000);
+        retval |= curTick  & ULL(0x00000000ffffffff);
+        break;
 
       case ISA::IPR_VA:
-    // SFX: unlocks interrupt status registers
-    retval = ipr[idx];
-
-        if (!misspeculating())
-            regs.intrlock = false;
-    break;
+        retval = ipr[idx];
+        break;
 
       case ISA::IPR_VA_FORM:
       case ISA::IPR_MM_STAT:
       case ISA::IPR_IFAULT_VA_FORM:
       case ISA::IPR_EXC_MASK:
       case ISA::IPR_EXC_SUM:
-    retval = ipr[idx];
-    break;
+        retval = ipr[idx];
+        break;
 
       case ISA::IPR_DTB_PTE:
-    {
-        ISA::PTE &pte = dtb->index(!misspeculating());
-
-        retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
-        retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
-        retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
-        retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
-        retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
-        retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
-        retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
-    }
-    break;
-
-    // write only registers
+        {
+            typename ISA::PTE &pte = cpu->dtb->index(1);
+
+            retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+            retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+            retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+            retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+            retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+            retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+            retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+        }
+        break;
+
+        // write only registers
       case ISA::IPR_HWINT_CLR:
       case ISA::IPR_SL_XMIT:
       case ISA::IPR_DC_FLUSH:
@@ -318,22 +338,19 @@ PhysRegFile<Impl>::readIpr(int idx, Fault &fault)
       case ISA::IPR_DTB_IAP:
       case ISA::IPR_ITB_IA:
       case ISA::IPR_ITB_IAP:
-    fault = Unimplemented_Opcode_Fault;
-    break;
+        fault = Unimplemented_Opcode_Fault;
+        break;
 
       default:
-    // invalid IPR
-    fault = Unimplemented_Opcode_Fault;
-    break;
+        // invalid IPR
+        fault = Unimplemented_Opcode_Fault;
+        break;
     }
 
     return retval;
 }
 
-#ifdef DEBUG
-// Cause the simulator to break when changing to the following IPL
-int break_ipl = -1;
-#endif
+extern int break_ipl;
 
 template <class Impl>
 Fault
@@ -341,9 +358,6 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
 {
     uint64_t old;
 
-    if (misspeculating())
-    return No_Fault;
-
     switch (idx) {
       case ISA::IPR_PALtemp0:
       case ISA::IPR_PALtemp1:
@@ -372,222 +386,225 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
       case ISA::IPR_IC_PERR_STAT:
       case ISA::IPR_DC_PERR_STAT:
       case ISA::IPR_PMCTR:
-    // write entire quad w/ no side-effect
-    ipr[idx] = val;
-    break;
+        // write entire quad w/ no side-effect
+        ipr[idx] = val;
+        break;
 
       case ISA::IPR_CC_CTL:
-    // This IPR resets the cycle counter.  We assume this only
-    // happens once... let's verify that.
-    assert(ipr[idx] == 0);
-    ipr[idx] = 1;
-    break;
+        // This IPR resets the cycle counter.  We assume this only
+        // happens once... let's verify that.
+        assert(ipr[idx] == 0);
+        ipr[idx] = 1;
+        break;
 
       case ISA::IPR_CC:
-    // This IPR only writes the upper 64 bits.  It's ok to write
-    // all 64 here since we mask out the lower 32 in rpcc (see
-    // isa_desc).
-    ipr[idx] = val;
-    break;
+        // This IPR only writes the upper 64 bits.  It's ok to write
+        // all 64 here since we mask out the lower 32 in rpcc (see
+        // isa_desc).
+        ipr[idx] = val;
+        break;
 
       case ISA::IPR_PALtemp23:
-    // write entire quad w/ no side-effect
-    old = ipr[idx];
-    ipr[idx] = val;
-    kernelStats.context(old, val);
-    break;
+        // write entire quad w/ no side-effect
+        old = ipr[idx];
+        ipr[idx] = val;
+//        kernelStats.context(old, val);
+        break;
 
       case ISA::IPR_DTB_PTE:
-    // write entire quad w/ no side-effect, tag is forthcoming
-    ipr[idx] = val;
-    break;
+        // write entire quad w/ no side-effect, tag is forthcoming
+        ipr[idx] = val;
+        break;
 
       case ISA::IPR_EXC_ADDR:
-    // second least significant bit in PC is always zero
-    ipr[idx] = val & ~2;
-    break;
+        // second least significant bit in PC is always zero
+        ipr[idx] = val & ~2;
+        break;
 
       case ISA::IPR_ASTRR:
       case ISA::IPR_ASTER:
-    // only write least significant four bits - privilege mask
-    ipr[idx] = val & 0xf;
-    break;
+        // only write least significant four bits - privilege mask
+        ipr[idx] = val & 0xf;
+        break;
 
       case ISA::IPR_IPLR:
 #ifdef DEBUG
-    if (break_ipl != -1 && break_ipl == (val & 0x1f))
-        debug_break();
+        if (break_ipl != -1 && break_ipl == (val & 0x1f))
+            debug_break();
 #endif
 
-    // only write least significant five bits - interrupt level
-    ipr[idx] = val & 0x1f;
-    kernelStats.swpipl(ipr[idx]);
-    break;
+        // only write least significant five bits - interrupt level
+        ipr[idx] = val & 0x1f;
+//        kernelStats.swpipl(ipr[idx]);
+        break;
 
       case ISA::IPR_DTB_CM:
-    kernelStats.mode((val & 0x18) != 0);
+//        if (val & 0x18)
+//            kernelStats->mode(Kernel::user);
+//        else
+//            kernelStats->mode(Kernel::kernel);
 
       case ISA::IPR_ICM:
-    // only write two mode bits - processor mode
-    ipr[idx] = val & 0x18;
-    break;
+        // only write two mode bits - processor mode
+        ipr[idx] = val & 0x18;
+        break;
 
       case ISA::IPR_ALT_MODE:
-    // only write two mode bits - processor mode
-    ipr[idx] = val & 0x18;
-    break;
+        // only write two mode bits - processor mode
+        ipr[idx] = val & 0x18;
+        break;
 
       case ISA::IPR_MCSR:
-    // more here after optimization...
-    ipr[idx] = val;
-    break;
+        // more here after optimization...
+        ipr[idx] = val;
+        break;
 
       case ISA::IPR_SIRR:
-    // only write software interrupt mask
-    ipr[idx] = val & 0x7fff0;
-    break;
+        // only write software interrupt mask
+        ipr[idx] = val & 0x7fff0;
+        break;
 
       case ISA::IPR_ICSR:
-    ipr[idx] = val & ULL(0xffffff0300);
-    break;
+        ipr[idx] = val & ULL(0xffffff0300);
+        break;
 
       case ISA::IPR_IVPTBR:
       case ISA::IPR_MVPTBR:
-    ipr[idx] = val & ULL(0xffffffffc0000000);
-    break;
+        ipr[idx] = val & ULL(0xffffffffc0000000);
+        break;
 
       case ISA::IPR_DC_TEST_CTL:
-    ipr[idx] = val & 0x1ffb;
-    break;
+        ipr[idx] = val & 0x1ffb;
+        break;
 
       case ISA::IPR_DC_MODE:
       case ISA::IPR_MAF_MODE:
-    ipr[idx] = val & 0x3f;
-    break;
+        ipr[idx] = val & 0x3f;
+        break;
 
       case ISA::IPR_ITB_ASN:
-    ipr[idx] = val & 0x7f0;
-    break;
+        ipr[idx] = val & 0x7f0;
+        break;
 
       case ISA::IPR_DTB_ASN:
-    ipr[idx] = val & ULL(0xfe00000000000000);
-    break;
+        ipr[idx] = val & ULL(0xfe00000000000000);
+        break;
 
       case ISA::IPR_EXC_SUM:
       case ISA::IPR_EXC_MASK:
-    // any write to this register clears it
-    ipr[idx] = 0;
-    break;
+        // any write to this register clears it
+        ipr[idx] = 0;
+        break;
 
       case ISA::IPR_INTID:
       case ISA::IPR_SL_RCV:
       case ISA::IPR_MM_STAT:
       case ISA::IPR_ITB_PTE_TEMP:
       case ISA::IPR_DTB_PTE_TEMP:
-    // read-only registers
-    return Unimplemented_Opcode_Fault;
+        // read-only registers
+        return Unimplemented_Opcode_Fault;
 
       case ISA::IPR_HWINT_CLR:
       case ISA::IPR_SL_XMIT:
       case ISA::IPR_DC_FLUSH:
       case ISA::IPR_IC_FLUSH:
-    // the following are write only
-    ipr[idx] = val;
-    break;
+        // the following are write only
+        ipr[idx] = val;
+        break;
 
       case ISA::IPR_DTB_IA:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;
 
-    dtb->flushAll();
-    break;
+        cpu->dtb->flushAll();
+        break;
 
       case ISA::IPR_DTB_IAP:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;
 
-    dtb->flushProcesses();
-    break;
+        cpu->dtb->flushProcesses();
+        break;
 
       case ISA::IPR_DTB_IS:
-    // really a control write
-    ipr[idx] = val;
+        // really a control write
+        ipr[idx] = val;
 
-    dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
-    break;
+        cpu->dtb->flushAddr(val, DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]));
+        break;
 
       case ISA::IPR_DTB_TAG: {
-      struct ISA::PTE pte;
-
-      // FIXME: granularity hints NYI...
-      if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
-          panic("PTE GH field != 0");
-
-      // write entire quad
-      ipr[idx] = val;
-
-      // construct PTE for new entry
-      pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
-      pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
-      pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
-      pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
-      pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
-      pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
-      pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);
-
-      // insert new TAG/PTE value into data TLB
-      dtb->insert(val, pte);
+          struct ISA::PTE pte;
+
+          // FIXME: granularity hints NYI...
+          if (DTB_PTE_GH(ipr[ISA::IPR_DTB_PTE]) != 0)
+              panic("PTE GH field != 0");
+
+          // write entire quad
+          ipr[idx] = val;
+
+          // construct PTE for new entry
+          pte.ppn = DTB_PTE_PPN(ipr[ISA::IPR_DTB_PTE]);
+          pte.xre = DTB_PTE_XRE(ipr[ISA::IPR_DTB_PTE]);
+          pte.xwe = DTB_PTE_XWE(ipr[ISA::IPR_DTB_PTE]);
+          pte.fonr = DTB_PTE_FONR(ipr[ISA::IPR_DTB_PTE]);
+          pte.fonw = DTB_PTE_FONW(ipr[ISA::IPR_DTB_PTE]);
+          pte.asma = DTB_PTE_ASMA(ipr[ISA::IPR_DTB_PTE]);
+          pte.asn = DTB_ASN_ASN(ipr[ISA::IPR_DTB_ASN]);
+
+          // insert new TAG/PTE value into data TLB
+          cpu->dtb->insert(val, pte);
       }
-    break;
+        break;
 
       case ISA::IPR_ITB_PTE: {
-      struct ISA::PTE pte;
-
-      // FIXME: granularity hints NYI...
-      if (ITB_PTE_GH(val) != 0)
-          panic("PTE GH field != 0");
-
-      // write entire quad
-      ipr[idx] = val;
-
-      // construct PTE for new entry
-      pte.ppn = ITB_PTE_PPN(val);
-      pte.xre = ITB_PTE_XRE(val);
-      pte.xwe = 0;
-      pte.fonr = ITB_PTE_FONR(val);
-      pte.fonw = ITB_PTE_FONW(val);
-      pte.asma = ITB_PTE_ASMA(val);
-      pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);
-
-      // insert new TAG/PTE value into data TLB
-      itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
+          struct ISA::PTE pte;
+
+          // FIXME: granularity hints NYI...
+          if (ITB_PTE_GH(val) != 0)
+              panic("PTE GH field != 0");
+
+          // write entire quad
+          ipr[idx] = val;
+
+          // construct PTE for new entry
+          pte.ppn = ITB_PTE_PPN(val);
+          pte.xre = ITB_PTE_XRE(val);
+          pte.xwe = 0;
+          pte.fonr = ITB_PTE_FONR(val);
+          pte.fonw = ITB_PTE_FONW(val);
+          pte.asma = ITB_PTE_ASMA(val);
+          pte.asn = ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]);
+
+          // insert new TAG/PTE value into data TLB
+          cpu->itb->insert(ipr[ISA::IPR_ITB_TAG], pte);
       }
-    break;
+        break;
 
       case ISA::IPR_ITB_IA:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;
 
-    itb->flushAll();
-    break;
+        cpu->itb->flushAll();
+        break;
 
       case ISA::IPR_ITB_IAP:
-    // really a control write
-    ipr[idx] = 0;
+        // really a control write
+        ipr[idx] = 0;
 
-    itb->flushProcesses();
-    break;
+        cpu->itb->flushProcesses();
+        break;
 
       case ISA::IPR_ITB_IS:
-    // really a control write
-    ipr[idx] = val;
+        // really a control write
+        ipr[idx] = val;
 
-    itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
-    break;
+        cpu->itb->flushAddr(val, ITB_ASN_ASN(ipr[ISA::IPR_ITB_ASN]));
+        break;
 
       default:
-    // invalid IPR
-    return Unimplemented_Opcode_Fault;
+        // invalid IPR
+        return Unimplemented_Opcode_Fault;
     }
 
     // no error...
@@ -596,4 +613,4 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
 
 #endif // #ifdef FULL_SYSTEM
 
-#endif // __REGFILE_HH__
+#endif // __CPU_BETA_CPU_REGFILE_HH__
diff --git a/cpu/beta_cpu/rob.hh b/cpu/beta_cpu/rob.hh
index da6b5232a..3e08def74 100644
--- a/cpu/beta_cpu/rob.hh
+++ b/cpu/beta_cpu/rob.hh
@@ -10,8 +10,6 @@
 #include <utility>
 #include <vector>
 
-//#include "arch/alpha/isa_traits.hh"
-
 /**
  * ROB class.  Uses the instruction list that exists within the CPU to
  * represent the ROB.  This class doesn't contain that list, but instead
diff --git a/cpu/beta_cpu/rob_impl.hh b/cpu/beta_cpu/rob_impl.hh
index 86c4e2db1..52d51028e 100644
--- a/cpu/beta_cpu/rob_impl.hh
+++ b/cpu/beta_cpu/rob_impl.hh
@@ -1,5 +1,5 @@
-#ifndef __ROB_IMPL_HH__
-#define __ROB_IMPL_HH__
+#ifndef __CPU_BETA_CPU_ROB_IMPL_HH__
+#define __CPU_BETA_CPU_ROB_IMPL_HH__
 
 #include "cpu/beta_cpu/rob.hh"
 
@@ -107,10 +107,8 @@ ROB<Impl>::retireHead()
     assert(numInstsInROB == countInsts());
     assert(numInstsInROB > 0);
 
-    DynInstPtr head_inst;
-
     // Get the head ROB instruction.
-    head_inst = cpu->instList.front();
+    DynInstPtr head_inst = cpu->instList.front();
 
     // Make certain this can retire.
     assert(head_inst->readyToCommit());
@@ -126,11 +124,10 @@ ROB<Impl>::retireHead()
     // A special case is needed if the instruction being retired is the
     // only instruction in the ROB; otherwise the tail iterator will become
     // invalidated.
-    if (tail == cpu->instList.begin()) {
-        cpu->removeFrontInst(head_inst);
+    cpu->removeFrontInst(head_inst);
+
+    if (numInstsInROB == 0) {
         tail = cpu->instList.end();
-    } else {
-        cpu->removeFrontInst(head_inst);
     }
 }
 
@@ -283,4 +280,4 @@ ROB<Impl>::readTailSeqNum()
     return (*tail)->seqNum;
 }
 
-#endif // __ROB_IMPL_HH__
+#endif // __CPU_BETA_CPU_ROB_IMPL_HH__
diff --git a/cpu/ooo_cpu/ooo_cpu.hh b/cpu/ooo_cpu/ooo_cpu.hh
index 25fdb39b6..ddbc3b061 100644
--- a/cpu/ooo_cpu/ooo_cpu.hh
+++ b/cpu/ooo_cpu/ooo_cpu.hh
@@ -122,7 +122,7 @@ class OoOCPU : public BaseCPU
     enum Status {
         Running,
         Idle,
-        IcacheMissStall,
+        IcacheMiss,
         IcacheMissComplete,
         DcacheMissStall,
         SwitchedOut
@@ -161,6 +161,8 @@ class OoOCPU : public BaseCPU
 
     virtual ~OoOCPU();
 
+    void init();
+
   private:
     void copyFromXC();
 
@@ -203,14 +205,21 @@ class OoOCPU : public BaseCPU
     // Will need to create a cache completion event upon any memory miss.
     ICacheCompletionEvent iCacheCompletionEvent;
 
+    class DCacheCompletionEvent;
+
+    typedef typename
+    std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
+
     class DCacheCompletionEvent : public Event
     {
       private:
         OoOCPU *cpu;
         DynInstPtr inst;
+        DCacheCompEventIt dcceIt;
 
       public:
-        DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst);
+        DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
+                              DCacheCompEventIt &_dcceIt);
 
         virtual void process();
         virtual const char *description();
@@ -218,6 +227,11 @@ class OoOCPU : public BaseCPU
 
     friend class DCacheCompletionEvent;
 
+  protected:
+    std::list<DCacheCompletionEvent> dCacheCompList;
+    DCacheCompEventIt dcceIt;
+
+  private:
     Status status() const { return _status; }
 
     virtual void activateContext(int thread_num, int delay);
@@ -260,6 +274,8 @@ class OoOCPU : public BaseCPU
 
     void processICacheCompletion();
 
+  public:
+
     virtual void serialize(std::ostream &os);
     virtual void unserialize(Checkpoint *cp, const std::string &section);
 
@@ -350,7 +366,7 @@ class OoOCPU : public BaseCPU
 
     void commitHeadInst();
 
-    bool grabInst();
+    bool getOneInst();
 
     Fault fetchCacheLine();
 
@@ -471,6 +487,7 @@ class OoOCPU : public BaseCPU
     // ROB tracking stuff.
     DynInstPtr robHeadPtr;
     DynInstPtr robTailPtr;
+    unsigned robSize;
     unsigned robInsts;
 
     // List of outstanding EA instructions.
@@ -545,10 +562,8 @@ OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
         /*MemAccessResult result = */dcacheInterface->access(readReq);
 
         if (dcacheInterface->doEvents()) {
-            readReq->completionEvent = new DCacheCompletionEvent(this, inst);
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
+            readReq->completionEvent = new DCacheCompletionEvent(this, inst,
+                                                                 dcceIt);
         }
     }
 
@@ -579,7 +594,7 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
     writeReq->reset(addr, sizeof(T), flags);
 
     // translate to physical address
-    Fault fault = xc->translateDataWriteReq(writeReq);
+    Fault fault = translateDataWriteReq(writeReq);
 
     // do functional access
     if (fault == No_Fault)
@@ -593,10 +608,8 @@ OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
         /*MemAccessResult result = */dcacheInterface->access(writeReq);
 
         if (dcacheInterface->doEvents()) {
-            writeReq->completionEvent = new DCacheCompletionEvent(this, inst);
-            lastDcacheStall = curTick;
-            unscheduleTickEvent();
-            _status = DcacheMissStall;
+            writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
+                                                                  dcceIt);
         }
     }
 
diff --git a/cpu/static_inst.hh b/cpu/static_inst.hh
index 3ac88fd3d..4bbe8b636 100644
--- a/cpu/static_inst.hh
+++ b/cpu/static_inst.hh
@@ -41,16 +41,12 @@
 
 // forward declarations
 struct AlphaSimpleImpl;
-struct OoOImpl;
 class ExecContext;
 class DynInst;
 
 template <class Impl>
 class AlphaDynInst;
 
-template <class Impl>
-class OoODynInst;
-
 class FastCPU;
 class SimpleCPU;
 class InorderCPU;
@@ -260,7 +256,7 @@ class StaticInst : public StaticInstBase
      * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
      * just the EA computation.
      */
-    virtual
+    virtual const
     StaticInstPtr<ISA> &eaCompInst() const { return nullStaticInstPtr; }
 
     /**
@@ -269,7 +265,7 @@ class StaticInst : public StaticInstBase
      * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
      * just the memory access (not the EA computation).
      */
-    virtual
+    virtual const
     StaticInstPtr<ISA> &memAccInst() const { return nullStaticInstPtr; }
 
     /// The binary machine instruction.
diff --git a/kern/kernel_stats.hh b/kern/kernel_stats.hh
index af93eb95c..66e9911b5 100644
--- a/kern/kernel_stats.hh
+++ b/kern/kernel_stats.hh
@@ -41,6 +41,9 @@
 class BaseCPU;
 class ExecContext;
 class FnEvent;
+// What does kernel stats expect is included?
+class StaticInstBase;
+class System;
 enum Fault;
 
 namespace Kernel {
diff --git a/python/m5/objects/AlphaFullCPU.mpy b/python/m5/objects/AlphaFullCPU.mpy
new file mode 100644
index 000000000..bf3f2d718
--- /dev/null
+++ b/python/m5/objects/AlphaFullCPU.mpy
@@ -0,0 +1,79 @@
+from BaseCPU import BaseCPU
+
+simobj DerivAlphaFullCPU(BaseCPU):
+    type = 'DerivAlphaFullCPU'
+
+    numThreads = Param.Unsigned("number of HW thread contexts")
+
+    if not build_env['FULL_SYSTEM']:
+        mem = Param.FunctionalMemory(NULL, "memory")
+
+    decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+    renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+    iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+               "delay")
+    commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+    fetchWidth = Param.Unsigned("Fetch width")
+
+    renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+    iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+               "delay")
+    commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+    fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+    decodeWidth = Param.Unsigned("Decode width")
+
+    iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+               "delay")
+    commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+    decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+    renameWidth = Param.Unsigned("Rename width")
+
+    commitToIEWDelay = Param.Unsigned("Commit to "
+               "Issue/Execute/Writeback delay")
+    renameToIEWDelay = Param.Unsigned("Rename to "
+               "Issue/Execute/Writeback delay")
+    issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+              "to the IEW stage)")
+    issueWidth = Param.Unsigned("Issue width")
+    executeWidth = Param.Unsigned("Execute width")
+    executeIntWidth = Param.Unsigned("Integer execute width")
+    executeFloatWidth = Param.Unsigned("Floating point execute width")
+    executeBranchWidth = Param.Unsigned("Branch execute width")
+    executeMemoryWidth = Param.Unsigned("Memory execute width")
+
+    iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+               "delay")
+    renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+    commitWidth = Param.Unsigned("Commit width")
+    squashWidth = Param.Unsigned("Squash width")
+
+    local_predictor_size = Param.Unsigned("Size of local predictor")
+    local_ctr_bits = Param.Unsigned("Bits per counter")
+    local_history_table_size = Param.Unsigned("Size of local history table")
+    local_history_bits = Param.Unsigned("Bits for the local history")
+    global_predictor_size = Param.Unsigned("Size of global predictor")
+    global_ctr_bits = Param.Unsigned("Bits per counter")
+    global_history_bits = Param.Unsigned("Bits of history")
+    choice_predictor_size = Param.Unsigned("Size of choice predictor")
+    choice_ctr_bits = Param.Unsigned("Bits of choice counters")
+
+    BTBEntries = Param.Unsigned("Number of BTB entries")
+    BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+    RASSize = Param.Unsigned("RAS size")
+
+    LQEntries = Param.Unsigned("Number of load queue entries")
+    SQEntries = Param.Unsigned("Number of store queue entries")
+    LFSTSize = Param.Unsigned("Last fetched store table size")
+    SSITSize = Param.Unsigned("Store set ID table size")
+
+    numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+    numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+               "registers")
+    numIQEntries = Param.Unsigned("Number of instruction queue entries")
+    numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+    instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+    function_trace = Param.Bool(False, "Enable function trace")
+    function_trace_start = Param.Tick(0, "Cycle to start function trace")