42 files changed, 906 insertions, 409 deletions
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index 4d4b7574c..1c2278f6f 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -28,11 +28,7 @@
 #
 # Authors: Steve Reinhardt
 
-import os
-import os.path
-
-# Import build environment variable from SConstruct.
-Import('env')
+Import('*')
 
 #################################################################
 #
@@ -107,89 +103,24 @@ env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS']))
 # and one of these are not being used.
 CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU']
 
-#################################################################
-#
-# Include CPU-model-specific files based on set of models
-# specified in CPU_MODELS build option.
-#
-#################################################################
-
-# Keep a list of CPU models that support SMT
-env['SMT_CPU_MODELS'] = []
-
-sources = []
-
-need_simple_base = False
-if 'AtomicSimpleCPU' in env['CPU_MODELS']:
-    need_simple_base = True
-    sources += Split('simple/atomic.cc')
-
-if 'TimingSimpleCPU' in env['CPU_MODELS']:
-    need_simple_base = True
-    sources += Split('simple/timing.cc')
-
-if need_simple_base:
-    sources += Split('simple/base.cc')
-
-if 'FastCPU' in env['CPU_MODELS']:
-    sources += Split('fast/cpu.cc')
-
-need_bp_unit = False
-if 'O3CPU' in env['CPU_MODELS']:
-    need_bp_unit = True
-    sources += SConscript('o3/SConscript', exports = 'env')
-    sources += Split('''
-        o3/base_dyn_inst.cc
-        o3/bpred_unit.cc
-        o3/commit.cc
-        o3/decode.cc
-        o3/fetch.cc
-        o3/free_list.cc
-        o3/fu_pool.cc
-        o3/cpu.cc
-        o3/iew.cc
-        o3/inst_queue.cc
-        o3/lsq_unit.cc
-        o3/lsq.cc
-        o3/mem_dep_unit.cc
-        o3/rename.cc
-        o3/rename_map.cc
-        o3/rob.cc
-        o3/scoreboard.cc
-        o3/store_set.cc
-        ''')
-    sources += Split('memtest/memtest.cc')
-    if env['USE_CHECKER']:
-        sources += Split('o3/checker_builder.cc')
-    else:
-        env['SMT_CPU_MODELS'].append('O3CPU') # Checker doesn't support SMT right now
-
-if 'OzoneCPU' in env['CPU_MODELS']:
-    need_bp_unit = True
-    sources += Split('''
-        ozone/base_dyn_inst.cc
-        ozone/bpred_unit.cc
-        ozone/cpu.cc
-        ozone/cpu_builder.cc
-        ozone/dyn_inst.cc
-        ozone/front_end.cc
-        ozone/lw_back_end.cc
-        ozone/lw_lsq.cc
-        ozone/rename_table.cc
-        ''')
-    if env['USE_CHECKER']:
-        sources += Split('ozone/checker_builder.cc')
-
-if need_bp_unit:
-    sources += Split('''
-        o3/2bit_local_pred.cc
-        o3/btb.cc
-        o3/ras.cc
-        o3/tournament_pred.cc
-        ''')
+Source('activity.cc')
+Source('base.cc')
+Source('cpuevent.cc')
+Source('exetrace.cc')
+Source('func_unit.cc')
+Source('op_class.cc')
+Source('pc_event.cc')
+Source('quiesce_event.cc')
+Source('static_inst.cc')
+Source('simple_thread.cc')
+Source('thread_state.cc')
+
+if env['FULL_SYSTEM']:
+    Source('intr_control.cc')
+    Source('profile.cc')
 
 if env['USE_CHECKER']:
-    sources += Split('checker/cpu.cc')
+    Source('checker/cpu.cc')
     checker_supports = False
     for i in CheckerSupportedCPUList:
         if i in env['CPU_MODELS']:
@@ -198,16 +129,5 @@ if env['USE_CHECKER']:
         print "Checker only supports CPU models",
         for i in CheckerSupportedCPUList:
             print i,
-        print ", please set USE_CHECKER=False or use one of those CPU models"              
+        print ", please set USE_CHECKER=False or use one of those CPU models"
         Exit(1)
-
-
-# FullCPU sources are included from src/SConscript since they're not
-# below this point in the file hierarchy.
-
-# Convert file names to SCons File objects.  This takes care of the
-# path relative to the top of the directory tree.
-sources = [File(s) for s in sources]
-
-Return('sources')
-
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 3e0be6ad8..4dccee0d3 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -226,7 +226,8 @@ BaseCPU::startup()
 #endif
 
     if (params->progress_interval) {
-        new CPUProgressEvent(&mainEventQueue, params->progress_interval,
+        new CPUProgressEvent(&mainEventQueue,
+                             cycles(params->progress_interval),
                              this);
     }
 }
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 7167bfde0..4d8300186 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -34,11 +34,11 @@
 
 #include <vector>
 
+#include "arch/isa_traits.hh"
 #include "base/statistics.hh"
 #include "config/full_system.hh"
 #include "sim/eventq.hh"
 #include "mem/mem_object.hh"
-#include "arch/isa_traits.hh"
 
 #if FULL_SYSTEM
 #include "arch/interrupts.hh"
@@ -50,6 +50,11 @@ class ThreadContext;
 class System;
 class Port;
 
+namespace TheISA
+{
+    class Predecoder;
+}
+
 class CPUProgressEvent : public Event
 {
   protected:
@@ -125,6 +130,7 @@ class BaseCPU : public MemObject
 
   protected:
     std::vector<ThreadContext *> threadContexts;
+    std::vector<TheISA::Predecoder *> predecoders;
 
   public:
 
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 9ccdcdccc..6c6d90076 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -171,15 +171,15 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** The kind of fault this instruction has generated. */
     Fault fault;
 
-    /** The memory request. */
-    Request *req;
-
     /** Pointer to the data for the memory access. */
     uint8_t *memData;
 
     /** The effective virtual address (lds & stores only). */
     Addr effAddr;
 
+    /** Is the effective virtual address valid. */
+    bool effAddrValid;
+
     /** The effective physical address. */
     Addr physEffAddr;
 
@@ -601,12 +601,18 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Returns whether or not this instruction is ready to issue. */
     bool readyToIssue() const { return status[CanIssue]; }
 
+    /** Clears this instruction being able to issue. */
+    void clearCanIssue() { status.reset(CanIssue); }
+
     /** Sets this instruction as issued from the IQ. */
     void setIssued() { status.set(Issued); }
 
     /** Returns whether or not this instruction has issued. */
     bool isIssued() const { return status[Issued]; }
 
+    /** Clears this instruction as being issued. */
+    void clearIssued() { status.reset(Issued); }
+
     /** Sets this instruction as executed. */
     void setExecuted() { status.set(Executed); }
 
@@ -729,6 +735,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
      */
     bool eaCalcDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool isUncacheable;
+
+    /** Has this instruction generated a memory request. */
+    bool reqMade;
+
   public:
     /** Sets the effective address. */
     void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
@@ -745,6 +757,12 @@ class BaseDynInst : public FastAlloc, public RefCounted
     /** Whether or not the memory operation is done. */
     bool memOpDone;
 
+    /** Is this instruction's memory access uncacheable. */
+    bool uncacheable() { return isUncacheable; }
+
+    /** Has this instruction generated a memory request. */
+    bool hasRequest() { return reqMade; }
+
   public:
     /** Load queue index. */
     int16_t lqIdx;
@@ -776,25 +794,25 @@ template<class T>
 inline Fault
 BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 {
-    // Sometimes reads will get retried, so they may come through here
-    // twice.
-    if (!req) {
-        req = new Request();
-        req->setVirt(asid, addr, sizeof(T), flags, this->PC);
-        req->setThreadContext(thread->readCpuId(), threadNumber);
-    } else {
-        assert(addr == req->getVaddr());
-    }
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataReadReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 
@@ -817,6 +835,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
         // Commit will have to clean up whatever happened.  Set this
         // instruction as executed.
         this->setExecuted();
+        delete req;
     }
 
     if (traceData) {
@@ -837,21 +856,25 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
         traceData->setData(data);
     }
 
-    assert(req == NULL);
-
-    req = new Request();
+    reqMade = true;
+    Request *req = new Request();
     req->setVirt(asid, addr, sizeof(T), flags, this->PC);
     req->setThreadContext(thread->readCpuId(), threadNumber);
 
     if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
         TheISA::VMPageSize) {
+        delete req;
         return TheISA::genAlignmentFault();
     }
 
     fault = cpu->translateDataWriteReq(req, thread);
 
+    if (req->isUncacheable())
+        isUncacheable = true;
+
     if (fault == NoFault) {
         effAddr = req->getVaddr();
+        effAddrValid = true;
         physEffAddr = req->getPaddr();
         memReqFlags = req->getFlags();
 #if 0
@@ -863,12 +886,8 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 #else
         fault = cpu->write(req, data, sqIdx);
 #endif
-    }
-
-    if (res) {
-        // always return some result to keep misspeculated paths
-        // (which will ignore faults) deterministic
-        *res = (fault == NoFault) ? req->getExtraData() : 0;
+    } else {
+        delete req;
     }
 
     return fault;
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index c3d71e428..a1c866336 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -92,11 +92,13 @@ template <class Impl>
 void
 BaseDynInst<Impl>::initVars()
 {
-    req = NULL;
     memData = NULL;
     effAddr = 0;
+    effAddrValid = false;
     physEffAddr = 0;
 
+    isUncacheable = false;
+    reqMade = false;
     readyRegs = 0;
 
     instResult.integer = 0;
@@ -140,10 +142,6 @@ BaseDynInst<Impl>::initVars()
 template <class Impl>
 BaseDynInst<Impl>::~BaseDynInst()
 {
-    if (req) {
-        delete req;
-    }
-
     if (memData) {
         delete [] memData;
     }
@@ -271,7 +269,7 @@ void
 BaseDynInst<Impl>::markSrcRegReady()
 {
     if (++readyRegs == numSrcRegs()) {
-        status.set(CanIssue);
+        setCanIssue();
     }
 }
 
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 54d8c68fa..c568b1439 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -31,14 +31,17 @@
  *          Steve Raasch
  */
 
+#include <errno.h>
 #include <fstream>
 #include <iomanip>
 #include <sys/ipc.h>
 #include <sys/shm.h>
 
+#include "arch/predecoder.hh"
 #include "arch/regfile.hh"
 #include "arch/utility.hh"
 #include "base/loader/symtab.hh"
+#include "base/socket.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
@@ -64,6 +67,7 @@ static bool wasMicro = false;
 
 namespace Trace {
 SharedData *shared_data = NULL;
+ListenSocket *cosim_listener = NULL;
 
 void
 setupSharedData()
@@ -149,9 +153,96 @@ Trace::InstRecord::dump()
     ostream &outs = Trace::output();
 
     DPRINTF(Sparc, "Instruction: %#X\n", staticInst->machInst);
+    bool diff = true;
     if (IsOn(ExecRegDelta))
     {
+        diff = false;
+#ifndef NDEBUG
 #if THE_ISA == SPARC_ISA
+        static int fd = 0;
+        //Don't print what happens for each micro-op, just print out
+        //once at the last op, and for regular instructions.
+        if(!staticInst->isMicroOp() || staticInst->isLastMicroOp())
+        {
+            if(!cosim_listener)
+            {
+                int port = 8000;
+                cosim_listener = new ListenSocket();
+                while(!cosim_listener->listen(port, true))
+                {
+                    DPRINTF(GDBMisc, "Can't bind port %d\n", port);
+                    port++;
+                }
+                ccprintf(cerr, "Listening for cosimulator on port %d\n", port);
+                fd = cosim_listener->accept();
+            }
+            char prefix[] = "goli";
+            for(int p = 0; p < 4; p++)
+            {
+                for(int i = 0; i < 8; i++)
+                {
+                    uint64_t regVal;
+                    int res = read(fd, &regVal, sizeof(regVal));
+                    if(res < 0)
+                        panic("First read call failed! %s\n", strerror(errno));
+                    regVal = TheISA::gtoh(regVal);
+                    uint64_t realRegVal = thread->readIntReg(p * 8 + i);
+                    if((regVal & 0xffffffffULL) != (realRegVal & 0xffffffffULL))
+                    {
+                        DPRINTF(ExecRegDelta, "Register %s%d should be %#x but is %#x.\n", prefix[p], i, regVal, realRegVal);
+                        diff = true;
+                    }
+                    //ccprintf(outs, "%s%d m5 = %#x statetrace = %#x\n", prefix[p], i, realRegVal, regVal);
+                }
+            }
+            /*for(int f = 0; f <= 62; f+=2)
+            {
+                uint64_t regVal;
+                int res = read(fd, &regVal, sizeof(regVal));
+                if(res < 0)
+                    panic("First read call failed! %s\n", strerror(errno));
+                regVal = TheISA::gtoh(regVal);
+                uint64_t realRegVal = thread->readFloatRegBits(f, 64);
+                if(regVal != realRegVal)
+                {
+                    DPRINTF(ExecRegDelta, "Register f%d should be %#x but is %#x.\n", f, regVal, realRegVal);
+                }
+            }*/
+            uint64_t regVal;
+            int res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            uint64_t realRegVal = thread->readNextPC();
+            if(regVal != realRegVal)
+            {
+                DPRINTF(ExecRegDelta, "Register pc should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+            res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            realRegVal = thread->readNextNPC();
+            if(regVal != realRegVal)
+            {
+                DPRINTF(ExecRegDelta, "Register npc should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+            res = read(fd, &regVal, sizeof(regVal));
+            if(res < 0)
+                panic("First read call failed! %s\n", strerror(errno));
+            regVal = TheISA::gtoh(regVal);
+            realRegVal = thread->readIntReg(SparcISA::NumIntArchRegs + 2);
+            if((regVal & 0xF) != (realRegVal & 0xF))
+            {
+                DPRINTF(ExecRegDelta, "Register ccr should be %#x but is %#x.\n", regVal, realRegVal);
+                diff = true;
+            }
+        }
+#endif
+#endif
+#if 0 //THE_ISA == SPARC_ISA
         //Don't print what happens for each micro-op, just print out
         //once at the last op, and for regular instructions.
         if(!staticInst->isMicroOp() || staticInst->isLastMicroOp())
@@ -210,7 +301,8 @@ Trace::InstRecord::dump()
         }
 #endif
     }
-    else if (IsOn(ExecIntel)) {
+    if(!diff) {
+    } else if (IsOn(ExecIntel)) {
         ccprintf(outs, "%7d ) ", when);
         outs << "0x" << hex << PC << ":\t";
         if (staticInst->isLoad()) {
@@ -302,6 +394,7 @@ Trace::InstRecord::dump()
         outs << endl;
     }
 #if THE_ISA == SPARC_ISA && FULL_SYSTEM
+    static TheISA::Predecoder predecoder(NULL);
     // Compare
     if (IsOn(ExecLegion))
     {
@@ -556,9 +649,13 @@ Trace::InstRecord::dump()
                              << staticInst->disassemble(m5Pc, debugSymbolTable)
                              << endl;
 
+                        predecoder.setTC(thread);
+                        predecoder.moreBytes(m5Pc, 0, shared_data->instruction);
+
+                        assert(predecoder.extMachInstRead());
+
                         StaticInstPtr legionInst =
-                            StaticInst::decode(makeExtMI(shared_data->instruction,
-                                        thread));
+                            StaticInst::decode(predecoder.getExtMachInst());
                         outs << setfill(' ') << setw(15)
                              << " Legion Inst: "
                              << "0x" << setw(8) << setfill('0') << hex
diff --git a/src/cpu/memtest/SConscript b/src/cpu/memtest/SConscript
new file mode 100644
index 000000000..7b4d6d2c5
--- /dev/null
+++ b/src/cpu/memtest/SConscript
@@ -0,0 +1,34 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+if 'O3CPU' in env['CPU_MODELS']:
+    Source('memtest.cc')
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 8b3e9a11e..607cf1066 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -369,7 +369,7 @@ MemTest::tick()
         //This means we assume CPU does write forwarding to reads that alias something
         //in the cpu store buffer.
         if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
-            delete result;
+            delete [] result;
             delete req;
             return;
         }
diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript
index afbd4c533..bb1dfb613 100755
--- a/src/cpu/o3/SConscript
+++ b/src/cpu/o3/SConscript
@@ -26,52 +26,56 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
-# Authors: Korey Sewell
+# Authors: Nathan Binkert
 
-import os
-import os.path
 import sys
 
-# Import build environment variable from SConstruct.
-Import('env')
+Import('*')
 
+if 'O3CPU' in env['CPU_MODELS']:
+    Source('base_dyn_inst.cc')
+    Source('bpred_unit.cc')
+    Source('commit.cc')
+    Source('cpu.cc')
+    Source('decode.cc')
+    Source('fetch.cc')
+    Source('free_list.cc')
+    Source('fu_pool.cc')
+    Source('iew.cc')
+    Source('inst_queue.cc')
+    Source('lsq.cc')
+    Source('lsq_unit.cc')
+    Source('mem_dep_unit.cc')
+    Source('rename.cc')
+    Source('rename_map.cc')
+    Source('rob.cc')
+    Source('scoreboard.cc')
+    Source('store_set.cc')
 
-#################################################################
-#
-# Include ISA-specific files for the O3 CPU-model
-#
-#################################################################
-
-sources = []
-
-if env['TARGET_ISA'] == 'alpha':
-    sources += Split('''
-        alpha/dyn_inst.cc
-        alpha/cpu.cc
-        alpha/thread_context.cc
-        alpha/cpu_builder.cc
-        ''')
-elif env['TARGET_ISA'] == 'mips':
-    sources += Split('''
-        mips/dyn_inst.cc
-        mips/cpu.cc
-        mips/thread_context.cc
-        mips/cpu_builder.cc
-        ''')
-elif env['TARGET_ISA'] == 'sparc':
-    sources += Split('''
-        sparc/dyn_inst.cc
-        sparc/cpu.cc
-        sparc/thread_context.cc
-        sparc/cpu_builder.cc
-        ''')
-else:
-    sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
-
+    if env['TARGET_ISA'] == 'alpha':
+        Source('alpha/cpu.cc')
+        Source('alpha/cpu_builder.cc')
+        Source('alpha/dyn_inst.cc')
+        Source('alpha/thread_context.cc')
+    elif env['TARGET_ISA'] == 'mips':
+        Source('mips/cpu.cc')
+        Source('mips/cpu_builder.cc')
+        Source('mips/dyn_inst.cc')
+        Source('mips/thread_context.cc')
+    elif env['TARGET_ISA'] == 'sparc':
+        Source('sparc/cpu.cc')
+        Source('sparc/cpu_builder.cc')
+        Source('sparc/dyn_inst.cc')
+        Source('sparc/thread_context.cc')
+    else:
+        sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA'])
 
-# Convert file names to SCons File objects.  This takes care of the
-# path relative to the top of the directory tree.
-sources = [File(s) for s in sources]
+    if env['USE_CHECKER']:
+        Source('checker_builder.cc')
 
-Return('sources')
+if 'O3CPU' in env['CPU_MODELS'] or 'OzoneCPU' in env['CPU_MODELS']:
+    Source('2bit_local_pred.cc')
+    Source('btb.cc')
+    Source('ras.cc')
+    Source('tournament_pred.cc')
 
diff --git a/src/cpu/o3/SConsopts b/src/cpu/o3/SConsopts
new file mode 100644
index 000000000..040352e6a
--- /dev/null
+++ b/src/cpu/o3/SConsopts
@@ -0,0 +1,34 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+all_cpu_list.append('O3CPU')
+default_cpus.append('O3CPU')
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+Param<int> cpu_id;
 Param<int> activity;
 
 #if FULL_SYSTEM
 SimObjectParam<System *> system;
-Param<int> cpu_id;
 SimObjectParam<AlphaISA::ITB *> itb;
 SimObjectParam<AlphaISA::DTB *> dtb;
 Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     AlphaSimpleParams *params = new AlphaSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index b91972704..304ee6c38 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
 #endif
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
     /** Handles squashing due to an TC write. */
     void squashFromTC(unsigned tid);
 
+#if FULL_SYSTEM
+    /** Handles processing an interrupt. */
+    void handleInterrupt();
+#endif // FULL_SYSTEM
+
     /** Commits as many instructions as possible. */
     void commitInsts();
 
@@ -409,6 +414,16 @@ class DefaultCommit
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
+    /** Records if there is a trap currently in flight. */
+    bool trapInFlight[Impl::MaxThreads];
+
+    /** Records if there were any stores committed this cycle. */
+    bool committedStores[Impl::MaxThreads];
+
+    /** Records if commit should check if the ROB is truly empty (see
+        commit_impl.hh). */
+    bool checkEmptyROB[Impl::MaxThreads];
+
     /** Pointer to the list of active threads. */
     std::list<unsigned> *activeThreads;
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 18fb2aaa3..65e36d99a 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -96,7 +96,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     if (policy == "aggressive"){
         commitPolicy = Aggressive;
 
-        DPRINTF(Commit,"Commit Policy set to Aggressive.");
+//        DPRINTF(Commit,"Commit Policy set to Aggressive.");
     } else if (policy == "roundrobin"){
         commitPolicy = RoundRobin;
 
@@ -105,11 +105,11 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
             priority_list.push_back(tid);
         }
 
-        DPRINTF(Commit,"Commit Policy set to Round Robin.");
+//        DPRINTF(Commit,"Commit Policy set to Round Robin.");
     } else if (policy == "oldestready"){
         commitPolicy = OldestReady;
 
-        DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
+//        DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
     } else {
         assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive,"
                "RoundRobin,OldestReady}");
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     for (int i=0; i < numThreads; i++) {
         commitStatus[i] = Idle;
         changedROBNumEntries[i] = false;
+        checkEmptyROB[i] = false;
+        trapInFlight[i] = false;
+        committedStores[i] = false;
         trapSquash[i] = false;
         tcSquash[i] = false;
         PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -226,8 +229,8 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
 
     // Commit must broadcast the number of free entries it has at the start of
     // the simulation, so it starts as active.
@@ -247,7 +250,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to send information back to IEW.
@@ -261,7 +263,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -272,7 +273,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to get instructions from rename (for the ROB).
@@ -283,7 +283,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to get instructions from IEW.
@@ -301,7 +300,6 @@ template<class Impl>
 void
 DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -309,8 +307,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Commit, "Setting rename map pointers.\n");
-
     for (int i=0; i < numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -320,7 +316,6 @@ template <class Impl>
 void
 DefaultCommit<Impl>::setROB(ROB *rob_ptr)
 {
-    DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
     rob = rob_ptr;
 }
 
@@ -335,6 +330,7 @@ DefaultCommit<Impl>::initStage()
     for (int i=0; i < numThreads; i++) {
         toIEW->commitInfo[i].usedROB = true;
         toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+        toIEW->commitInfo[i].emptyROB = true;
     }
 
     cpu->activityThisCycle();
@@ -473,14 +469,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
     TrapEvent *trap = new TrapEvent(this, tid);
 
     trap->schedule(curTick + trapLatency);
-
-    thread[tid]->trapPending = true;
+    trapInFlight[tid] = true;
 }
 
 template <class Impl>
 void
 DefaultCommit<Impl>::generateTCEvent(unsigned tid)
 {
+    assert(!trapInFlight[tid]);
     DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
 
     tcSquash[tid] = true;
@@ -495,7 +491,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
     // Hopefully this doesn't mess things up.  Basically I want to squash
     // all instructions of this thread.
     InstSeqNum squashed_inst = rob->isEmpty() ?
-        0 : rob->readHeadInst(tid)->seqNum - 1;;
+        0 : rob->readHeadInst(tid)->seqNum - 1;
 
     // All younger instructions will be squashed. Set the sequence
     // number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +528,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
 
     thread[tid]->trapPending = false;
     thread[tid]->inSyscall = false;
+    trapInFlight[tid] = false;
 
     trapSquash[tid] = false;
 
@@ -580,6 +577,10 @@ DefaultCommit<Impl>::tick()
     while (threads != end) {
         unsigned tid = *threads++;
 
+        // Clear the bit saying if the thread has committed stores
+        // this cycle.
+        committedStores[tid] = false;
+
         if (commitStatus[tid] == ROBSquashing) {
 
             if (rob->isDoneSquashing(tid)) {
@@ -635,16 +636,11 @@ DefaultCommit<Impl>::tick()
     updateStatus();
 }
 
+#if FULL_SYSTEM
 template <class Impl>
 void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
 {
-
-    //////////////////////////////////////
-    // Check for interrupts
-    //////////////////////////////////////
-
-#if FULL_SYSTEM
     if (interrupt != NoFault) {
         // Wait until the ROB is empty and all stores have drained in
         // order to enter the interrupt.
@@ -653,6 +649,12 @@ DefaultCommit<Impl>::commit()
             // an interrupt needed to be handled.
             DPRINTF(Commit, "Interrupt detected.\n");
 
+            Fault new_interrupt = cpu->getInterrupts();
+            assert(new_interrupt != NoFault);
+
+            // Clear the interrupt now that it's going to be handled
+            toIEW->commitInfo[0].clearInterrupt = true;
+
             assert(!thread[0]->inSyscall);
             thread[0]->inSyscall = true;
 
@@ -666,16 +668,14 @@ DefaultCommit<Impl>::commit()
             // Generate trap squash event.
             generateTrapEvent(0);
 
-            // Clear the interrupt now that it's been handled
-            toIEW->commitInfo[0].clearInterrupt = true;
             interrupt = NoFault;
         } else {
             DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
         }
-    } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
-        commitStatus[0] != TrapPending &&
-        !trapSquash[0] &&
-        !tcSquash[0]) {
+    } else if (commitStatus[0] != TrapPending &&
+               cpu->check_interrupts(cpu->tcBase(0)) &&
+               !trapSquash[0] &&
+               !tcSquash[0]) {
         // Process interrupts if interrupts are enabled, not in PAL
         // mode, and no other traps or external squashes are currently
         // pending.
@@ -691,7 +691,21 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[0].interruptPending = true;
         }
     }
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
 
+#if FULL_SYSTEM
+    // Check for any interrupt, and start processing it.  Or if we
+    // have an outstanding interrupt and are at a point when it is
+    // valid to take an interrupt, process it.
+    if (cpu->check_interrupts(cpu->tcBase(0))) {
+        handleInterrupt();
+    }
 #endif // FULL_SYSTEM
 
     ////////////////////////////////////
@@ -709,6 +723,7 @@ DefaultCommit<Impl>::commit()
             assert(!tcSquash[tid]);
             squashFromTrap(tid);
         } else if (tcSquash[tid] == true) {
+            assert(commitStatus[tid] != TrapPending);
             squashFromTC(tid);
         }
 
@@ -753,6 +768,7 @@ DefaultCommit<Impl>::commit()
                 bdelay_done_seq_num--;
 #endif
             }
+
             // All younger instructions will be squashed. Set the sequence
             // number as the youngest instruction in the ROB.
             youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +833,29 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[tid].usedROB = true;
             toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
 
-            if (rob->isEmpty(tid)) {
-                toIEW->commitInfo[tid].emptyROB = true;
-            }
-
             wroteToTimeBuffer = true;
             changedROBNumEntries[tid] = false;
+            if (rob->isEmpty(tid))
+                checkEmptyROB[tid] = true;
         }
+
+        // ROB is only considered "empty" for previous stages if: a)
+        // ROB is empty, b) there are no outstanding stores, c) IEW
+        // stage has received any information regarding stores that
+        // committed.
+        // c) is checked by making sure to not consider the ROB empty
+        // on the same cycle as when stores have been committed.
+        // @todo: Make this handle multi-cycle communication between
+        // commit and IEW.
+        if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+            !iewStage->hasStoresToWB() && !committedStores[tid]) {
+            checkEmptyROB[tid] = false;
+            toIEW->commitInfo[tid].usedROB = true;
+            toIEW->commitInfo[tid].emptyROB = true;
+            toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+            wroteToTimeBuffer = true;
+        }
+
     }
 }
 
@@ -966,8 +998,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         thread[tid]->funcExeInst--;
 
-        head_inst->setAtCommit();
-
         if (head_inst->isNonSpeculative() ||
             head_inst->isStoreConditional() ||
             head_inst->isMemBarrier() ||
@@ -977,19 +1007,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
                     "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
-            // Hack to make sure syscalls/memory barriers/quiesces
-            // aren't executed until all stores write back their data.
-            // This direct communication shouldn't be used for
-            // anything other than this.
-            if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-                    head_inst->isQuiesce()) &&
-                iewStage->hasStoresToWB())
-            {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
                 DPRINTF(Commit, "Waiting for all stores to writeback.\n");
                 return false;
-            } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
-                DPRINTF(Commit, "Waiting to become head of commit.\n");
-                return false;
             }
 
             toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1022,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
             return false;
         } else if (head_inst->isLoad()) {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
+                DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+
+            assert(head_inst->uncacheable());
             DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
@@ -1025,8 +1051,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         panic("Thread sync instructions are not handled yet.\n");
     }
 
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = head_inst->getFault();
+
     // Stores mark themselves as completed.
-    if (!head_inst->isStore()) {
+    if (!head_inst->isStore() && inst_fault == NoFault) {
         head_inst->setCompleted();
     }
 
@@ -1038,9 +1067,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     }
 #endif
 
-    // Check if the instruction caused a fault.  If so, trap.
-    Fault inst_fault = head_inst->getFault();
-
     // DTB will sometimes need the machine instruction for when
     // faults happen.  So we will set it here, prior to the DTB
     // possibly needing it for its fault.
@@ -1048,7 +1074,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
 
     if (inst_fault != NoFault) {
-        head_inst->setCompleted();
         DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
                 head_inst->seqNum, head_inst->readPC());
 
@@ -1057,6 +1082,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             return false;
         }
 
+        head_inst->setCompleted();
+
 #if USE_CHECKER
         if (cpu->checker && head_inst->isStore()) {
             cpu->checker->verify(head_inst);
@@ -1082,6 +1109,14 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
         commitStatus[tid] = TrapPending;
 
+        if (head_inst->traceData) {
+            head_inst->traceData->setFetchSeq(head_inst->seqNum);
+            head_inst->traceData->setCPSeq(thread[tid]->numInst);
+            head_inst->traceData->dump();
+            delete head_inst->traceData;
+            head_inst->traceData = NULL;
+        }
+
         // Generate trap squash event.
         generateTrapEvent(tid);
 //        warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1123,6 +1158,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     // Finally clear the head ROB entry.
     rob->retireHead(tid);
 
+    // If this was a store, record it for this cycle.
+    if (head_inst->isStore())
+        committedStores[tid] = true;
+
     // Return true to indicate that we have committed an instruction.
     return true;
 }
@@ -1167,7 +1206,8 @@ DefaultCommit<Impl>::getInsts()
         int tid = inst->threadNumber;
 
         if (!inst->isSquashed() &&
-            commitStatus[tid] != ROBSquashing) {
+            commitStatus[tid] != ROBSquashing &&
+            commitStatus[tid] != TrapPending) {
             changedROBNumEntries[tid] = true;
 
             DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 38e6a0b5b..354e3c490 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
             lastRunningCycle = curTick;
             timesIdled++;
         } else {
-            tickEvent.schedule(curTick + cycles(1));
+            tickEvent.schedule(nextCycle(curTick + cycles(1)));
             DPRINTF(O3CPU, "Scheduling next tick!\n");
         }
     }
@@ -886,7 +886,7 @@ FullO3CPU<Impl>::resume()
 #endif
 
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
     _status = Running;
 }
 
@@ -979,11 +979,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
         ThreadContext *tc = threadContexts[i];
         if (tc->status() == ThreadContext::Active && _status != Running) {
             _status = Running;
-            tickEvent.schedule(curTick);
+            tickEvent.schedule(nextCycle());
         }
     }
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
@@ -1393,7 +1393,7 @@ FullO3CPU<Impl>::wakeCPU()
 
     idleCycles += (curTick - 1) - lastRunningCycle;
 
-    tickEvent.schedule(curTick);
+    tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ea374dd57..0ab20ba2a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + cycles(delay));
+            tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
         else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + cycles(delay));
+            tickEvent.schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (activateThreadEvent[tid].squashed())
-            activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!activateThreadEvent[tid].scheduled())
-            activateThreadEvent[tid].schedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (deallocateContextEvent[tid].squashed())
-            deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!deallocateContextEvent[tid].scheduled())
-            deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 79a0bfdbf..93d02bfcd 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -114,15 +114,14 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Decode, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Decode, "Setting CPU pointer.\n");
 }
 
 template<class Impl>
 void
 DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Decode, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to write information back to fetch.
@@ -138,7 +137,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Decode, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to write information to proper place in decode queue.
@@ -149,7 +147,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Decode, "Setting fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Setup wire to read information from fetch queue.
@@ -160,7 +157,6 @@ template<class Impl>
 void
 DefaultDecode<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Decode, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 8347ed775..811f4d2bc 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -33,6 +33,7 @@
 #define __CPU_O3_FETCH_HH__
 
 #include "arch/utility.hh"
+#include "arch/predecoder.hh"
 #include "base/statistics.hh"
 #include "base/timebuf.hh"
 #include "cpu/pc_event.hh"
@@ -85,6 +86,8 @@ class DefaultFetch
 
         bool snoopRangeSent;
 
+        virtual void setPeer(Port *port);
+
       protected:
         /** Atomic version of receive.  Panics. */
         virtual Tick recvAtomic(PacketPtr pkt);
@@ -183,6 +186,9 @@ class DefaultFetch
     /** Initialize stage. */
     void initStage();
 
+    /** Tells the fetch stage that the Icache is set. */
+    void setIcache();
+
     /** Processes cache completion event. */
     void processCacheCompletion(PacketPtr pkt);
 
@@ -338,6 +344,9 @@ class DefaultFetch
     /** BPredUnit. */
     BPredUnit branchPred;
 
+    /** Predecoder. */
+    TheISA::Predecoder predecoder;
+
     /** Per-thread fetch PC. */
     Addr PC[Impl::MaxThreads];
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index ac0149d18..85885906d 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -51,6 +51,15 @@
 #include <algorithm>
 
 template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::setPeer(Port *port)
+{
+    Port::setPeer(port);
+
+    fetch->setIcache();
+}
+
+template<class Impl>
 Tick
 DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
 {
@@ -103,6 +112,7 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(Params *params)
     : branchPred(params),
+      predecoder(NULL),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
       iewToFetchDelay(params->iewToFetchDelay),
@@ -256,8 +266,8 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Fetch, "Setting the CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Fetch, "Setting the CPU pointer.\n");
 
     // Name is finally available, so create the port.
     icachePort = new IcachePort(this);
@@ -282,7 +292,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 {
-    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
     timeBuffer = time_buffer;
 
     // Create wires to get information from proper places in time buffer.
@@ -296,7 +305,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Fetch, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -304,7 +312,6 @@ template<class Impl>
 void
 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
-    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
     fetchQueue = fq_ptr;
 
     // Create wire to write information to proper place in fetch queue.
@@ -322,12 +329,6 @@ DefaultFetch<Impl>::initStage()
         nextNPC[tid] = cpu->readNextNPC(tid);
     }
 
-    // Size of cache block.
-    cacheBlkSize = icachePort->peerBlockSize();
-
-    // Create mask to get rid of offset bits.
-    cacheBlkMask = (cacheBlkSize - 1);
-
     for (int tid=0; tid < numThreads; tid++) {
 
         fetchStatus[tid] = Running;
@@ -336,11 +337,6 @@ DefaultFetch<Impl>::initStage()
 
         memReq[tid] = NULL;
 
-        // Create space to store a cache line.
-        cacheData[tid] = new uint8_t[cacheBlkSize];
-        cacheDataPC[tid] = 0;
-        cacheDataValid[tid] = false;
-
         stalls[tid].decode = false;
         stalls[tid].rename = false;
         stalls[tid].iew = false;
@@ -350,6 +346,24 @@ DefaultFetch<Impl>::initStage()
 
 template<class Impl>
 void
+DefaultFetch<Impl>::setIcache()
+{
+    // Size of cache block.
+    cacheBlkSize = icachePort->peerBlockSize();
+
+    // Create mask to get rid of offset bits.
+    cacheBlkMask = (cacheBlkSize - 1);
+
+    for (int tid=0; tid < numThreads; tid++) {
+        // Create space to store a cache line.
+        cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
+    }
+}
+
+template<class Impl>
+void
 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
     unsigned tid = pkt->req->getThreadNum();
@@ -619,6 +633,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                 fault = TheISA::genMachineCheckFault();
                 delete mem_req;
                 memReq[tid] = NULL;
+                warn("Bad address!\n");
             }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
@@ -669,11 +684,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
-        cacheBlocked = false;
-        retryTid = -1;
-        delete retryPkt->req;
-        delete retryPkt;
+        if (retryPkt) {
+            delete retryPkt->req;
+            delete retryPkt;
+        }
         retryPkt = NULL;
+        retryTid = -1;
     }
 
     fetchStatus[tid] = Squashing;
@@ -1117,13 +1133,10 @@ DefaultFetch<Impl>::fetch(bool &status_change)
             inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
                         (&cacheData[tid][offset]));
 
-#if THE_ISA == ALPHA_ISA
-            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
-#elif THE_ISA == SPARC_ISA
-            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#elif THE_ISA == MIPS_ISA
-            ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC());
-#endif
+            predecoder.setTC(cpu->thread[tid]->getTC());
+            predecoder.moreBytes(fetch_PC, 0, inst);
+
+            ext_inst = predecoder.getExtMachInst();
 
             // Create a new DynInst from the instruction fetched.
             DynInstPtr instruction = new DynInst(ext_inst,
@@ -1152,7 +1165,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
             ///FIXME This needs to be more robust in dealing with delay slots
 #if !ISA_HAS_DELAY_SLOT
-            predicted_branch |=
+//	    predicted_branch |=
 #endif
             lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
             predicted_branch |= (next_PC != fetch_NPC);
@@ -1223,7 +1236,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
 #if FULL_SYSTEM
-        assert(numInst != fetchWidth);
+        assert(numInst < fetchWidth);
         // Get a sequence number.
         inst_seq = cpu->getAndIncrementInstSeq();
         // We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..d2948a525 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -282,8 +282,8 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(IEW, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(IEW, "Setting CPU pointer.\n");
 
     instQueue.setCPU(cpu_ptr);
     ldstQueue.setCPU(cpu_ptr);
@@ -295,7 +295,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IEW, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from commit.
@@ -314,7 +313,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(IEW, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to read information from rename queue.
@@ -325,7 +323,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
 {
-    DPRINTF(IEW, "Setting IEW queue pointer.\n");
     iewQueue = iq_ptr;
 
     // Setup wire to write instructions to commit.
@@ -336,7 +333,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IEW, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 
     ldstQueue.setActiveThreads(at_ptr);
@@ -347,7 +343,6 @@ template<class Impl>
 void
 DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
 {
-    DPRINTF(IEW, "Setting scoreboard pointer.\n");
     scoreboard = sb_ptr;
 }
 
@@ -1153,19 +1148,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
             inst->setCanCommit();
             instQueue.insertBarrier(inst);
             add_to_iq = false;
-        } else if (inst->isNonSpeculative()) {
-            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
-                    "encountered, skipping.\n", tid);
-
-            // Same as non-speculative stores.
-            inst->setCanCommit();
-
-            // Specifically insert it as nonspeculative.
-            instQueue.insertNonSpec(inst);
-
-            ++iewDispNonSpecInsts;
-
-            add_to_iq = false;
         } else if (inst->isNop()) {
             DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
                     "skipping.\n", tid);
@@ -1193,6 +1175,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
         } else {
             add_to_iq = true;
         }
+        if (inst->isNonSpeculative()) {
+            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+                    "encountered, skipping.\n", tid);
+
+            // Same as non-speculative stores.
+            inst->setCanCommit();
+
+            // Specifically insert it as nonspeculative.
+            instQueue.insertNonSpec(inst);
+
+            ++iewDispNonSpecInsts;
+
+            add_to_iq = false;
+        }
 
         // If the instruction queue is not full, then add the
         // instruction.
@@ -1379,6 +1375,7 @@ DefaultIEW<Impl>::executeInsts()
                     predictedNotTakenIncorrect++;
                 }
             } else if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
                 // If there was an ordering violation, then get the
                 // DynInst that caused the violation.  Note that this
                 // clears the violation signal.
@@ -1391,10 +1388,10 @@ DefaultIEW<Impl>::executeInsts()
 
                 // Ensure the violating instruction is older than
                 // current squash
-                if (fetchRedirect[tid] &&
-                    violator->seqNum >= toCommit->squashedSeqNum[tid])
+/*                if (fetchRedirect[tid] &&
+                    violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
                     continue;
-
+*/
                 fetchRedirect[tid] = true;
 
                 // Tell the instruction queue that a violation has occured.
@@ -1414,6 +1411,33 @@ DefaultIEW<Impl>::executeInsts()
 
                 squashDueToMemBlocked(inst, tid);
             }
+        } else {
+            // Reset any state associated with redirects that will not
+            // be used.
+            if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
+
+                DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
+                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+                DPRINTF(IEW, "Violation will not be handled because "
+                        "already squashing\n");
+
+                ++memOrderViolationEvents;
+            }
+            if (ldstQueue.loadBlocked(tid) &&
+                !ldstQueue.isLoadBlockedHandled(tid)) {
+                DPRINTF(IEW, "Load operation couldn't execute because the "
+                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+                        inst->readPC(), inst->seqNum);
+                DPRINTF(IEW, "Blocked load will not be handled because "
+                        "already squashing\n");
+
+                ldstQueue.setLoadBlockedHandled(tid);
+            }
+
         }
     }
 
@@ -1563,6 +1587,7 @@ DefaultIEW<Impl>::tick()
             //DPRINTF(IEW,"NonspecInst from thread %i",tid);
             if (fromCommit->commitInfo[tid].uncached) {
                 instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+                fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
             } else {
                 instQueue.scheduleNonSpec(
                     fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d5781d89d..4d99fb520 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -81,8 +81,6 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
     // Set the number of physical registers as the number of int + float
     numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
 
-    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
-
     //Create an entry for each physical register within the
     //dependency graph.
     dependGraph.resize(numPhysRegs);
@@ -124,8 +122,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = part_amt;
         }
 
+/*
         DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
                 "%i entries per thread.\n",part_amt);
+*/
 
     } else if (policy == "threshold") {
         iqPolicy = Threshold;
@@ -139,8 +139,10 @@ InstructionQueue<Impl>::InstructionQueue(Params *params)
             maxEntries[i] = thresholdIQ;
         }
 
+/*
         DPRINTF(IQ, "IQ sharing policy set to Threshold:"
                 "%i entries per thread.\n",thresholdIQ);
+*/
    } else {
        assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
               "Partitioned, Threshold}");
@@ -360,7 +362,6 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(IQ, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -368,15 +369,13 @@ template <class Impl>
 void
 InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
 {
-    DPRINTF(IQ, "Set the issue to execute queue.\n");
-    issueToExecuteQueue = i2e_ptr;
+      issueToExecuteQueue = i2e_ptr;
 }
 
 template <class Impl>
 void
 InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(IQ, "Set the time buffer.\n");
     timeBuffer = tb_ptr;
 
     fromCommit = timeBuffer->getWire(-commitToIEWDelay);
@@ -829,6 +828,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
     unsigned tid = (*inst_it).second->threadNumber;
 
+    (*inst_it).second->setAtCommit();
+
     (*inst_it).second->setCanIssue();
 
     if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +961,8 @@ template <class Impl>
 void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
+    DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+    resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
 
@@ -984,7 +987,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
     completed_inst->memOpDone = true;
 
     memDepUnit[tid].completed(completed_inst);
-
     count[tid]--;
 }
 
@@ -1084,16 +1086,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
 
                     ++iqSquashedOperandsExamined;
                 }
-            } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+            } else if (!squashed_inst->isStoreConditional() ||
+                       !squashed_inst->isCompleted()) {
                 NonSpecMapIt ns_inst_it =
                     nonSpecInsts.find(squashed_inst->seqNum);
                 assert(ns_inst_it != nonSpecInsts.end());
+                if (ns_inst_it == nonSpecInsts.end()) {
+                    assert(squashed_inst->getFault() != NoFault);
+                } else {
 
-                (*ns_inst_it).second = NULL;
+                    (*ns_inst_it).second = NULL;
 
-                nonSpecInsts.erase(ns_inst_it);
+                    nonSpecInsts.erase(ns_inst_it);
 
-                ++iqSquashedNonSpecRemoved;
+                    ++iqSquashedNonSpecRemoved;
+                }
             }
 
             // Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index d4994fcb7..02cc5784c 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -112,8 +112,6 @@ LSQ<Impl>::LSQ(Params *params)
       SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
       retryTid(-1)
 {
-    DPRINTF(LSQ, "Creating LSQ object.\n");
-
     dcachePort.snoopRangeSent = false;
 
     //**********************************************/
@@ -131,20 +129,20 @@ LSQ<Impl>::LSQ(Params *params)
 
         maxLQEntries = LQEntries;
         maxSQEntries = SQEntries;
-
+/*
         DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
+*/
     } else if (policy == "partitioned") {
         lsqPolicy = Partitioned;
 
         //@todo:make work if part_amt doesnt divide evenly.
         maxLQEntries = LQEntries / numThreads;
         maxSQEntries = SQEntries / numThreads;
-
+/*
         DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
-
+*/
     } else if (policy == "threshold") {
         lsqPolicy = Threshold;
 
@@ -156,10 +154,11 @@ LSQ<Impl>::LSQ(Params *params)
         //amount of the LSQ
         maxLQEntries  = params->smtLSQThreshold;
         maxSQEntries  = params->smtLSQThreshold;
-
+/*
         DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
                 "%i entries per LQ | %i entries per SQ",
                 maxLQEntries,maxSQEntries);
+*/
 
     } else {
         assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 2419afe29..1b10843f5 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
+
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete req;
         return TheISA::genMachineCheckFault();
     }
 
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
         if (store_size == 0)
             continue;
+        else if (storeQueue[store_idx].inst->uncacheable())
+            continue;
+
+        assert(storeQueue[store_idx].inst->effAddrValid);
 
         // Check if the store data is within the lower and upper bounds of
         // addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             storeQueue[store_idx].inst->effAddr;
 
         // If the store's data has all of the data needed, we can forward.
-        if (store_has_lower_limit && store_has_upper_limit) {
+        if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
             // @todo: Magic number, assumes byte addressing
@@ -596,6 +605,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // If it's already been written back, then don't worry about
             // stalling on it.
             if (storeQueue[store_idx].completed) {
+                panic("Should not check one of these");
                 continue;
             }
 
@@ -614,6 +624,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // rescheduled eventually
             iewStage->rescheduleMemInst(load_inst);
             iewStage->decrWb(load_inst->seqNum);
+            load_inst->clearIssued();
             ++lsqRescheduledLoads;
 
             // Do not generate a writeback event as this instruction is not
@@ -622,7 +633,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                     "Store idx %i to load addr %#x\n",
                     store_idx, req->getVaddr());
 
-            ++lsqBlockedLoads;
+            // Must delete request now that it wasn't handed off to
+            // memory.  This is quite ugly.  @todo: Figure out the
+            // proper place to really handle request deletes.
+            delete req;
+
             return NoFault;
         }
     }
@@ -654,8 +669,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
+            delete data_pkt->req;
             delete data_pkt;
 
+            req = NULL;
+
             if (result == Packet::BadAddress) {
                 return TheISA::genMachineCheckFault();
             }
@@ -669,6 +687,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // If the cache was blocked, or has become blocked due to the access,
     // handle it.
     if (lsq->cacheBlocked()) {
+        if (req)
+            delete req;
+
         ++lsqCacheBlocked;
 
         iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 3ba22a530..0a3021046 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     if (isSwitchedOut() || inst->isSquashed()) {
         iewStage->decrWb(inst->seqNum);
         delete state;
+        delete pkt->req;
         delete pkt;
         return;
     } else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     }
 
     delete state;
+    delete pkt->req;
     delete pkt;
 }
 
@@ -110,7 +112,7 @@ void
 LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
                     unsigned maxSQEntries, unsigned id)
 {
-    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+//    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
 
     switchedOut = false;
 
@@ -403,12 +405,15 @@ template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
 {
+    using namespace TheISA;
     // Execute a specific load.
     Fault load_fault = NoFault;
 
     DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
             inst->readPC(),inst->seqNum);
 
+    assert(!inst->isSquashed());
+
     load_fault = inst->initiateAcc();
 
     // If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req && inst->req->isUncacheable()) ||
+        if (!(inst->hasRequest() && inst->uncacheable()) ||
             inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
         iewStage->activityThisCycle();
+    } else if (!loadBlocked()) {
+        assert(inst->effAddrValid);
+        int load_idx = inst->lqIdx;
+        incrLdIdx(load_idx);
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if (loadQueue[load_idx]->effAddrValid &&
+                (loadQueue[load_idx]->effAddr >> 8) ==
+                (inst->effAddr >> 8)) {
+                // A load incorrectly passed this load.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                DynInstPtr violator = loadQueue[load_idx];
+                if (!memDepViolator ||
+                    (violator->seqNum < memDepViolator->seqNum)) {
+                    memDepViolator = violator;
+                } else {
+                    break;
+                }
+
+                ++lsqMemOrderViolation;
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
     }
 
     return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
     DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
             store_inst->readPC(), store_inst->seqNum);
 
+    assert(!store_inst->isSquashed());
+
     // Check the recently completed loads to see if any match this store's
     // address.  If so, then we have a memory ordering violation.
     int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
         ++storesToWB;
     }
 
-    if (!memDepViolator) {
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
-            // It's safe to check all loads because effAddr is set to
-            // InvalAddr when the dyn inst is created.
-
-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
-
-            // @todo: Fix this, magic number being used here
-            if ((loadQueue[load_idx]->effAddr >> 8) ==
-                (store_inst->effAddr >> 8)) {
-                // A load incorrectly passed this store.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                memDepViolator = loadQueue[load_idx];
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
+    assert(store_inst->effAddrValid);
+    while (load_idx != loadTail) {
+        // Really only need to check loads that have actually executed
+        // It's safe to check all loads because effAddr is set to
+        // InvalAddr when the dyn inst is created.
+
+        // @todo: For now this is extra conservative, detecting a
+        // violation if the addresses match assuming all accesses
+        // are quad word accesses.
+
+        // @todo: Fix this, magic number being used here
+        if (loadQueue[load_idx]->effAddrValid &&
+            (loadQueue[load_idx]->effAddr >> 8) ==
+            (store_inst->effAddr >> 8)) {
+            // A load incorrectly passed this store.  Squash and refetch.
+            // For now return a fault to show that it was unsuccessful.
+            DynInstPtr violator = loadQueue[load_idx];
+            if (!memDepViolator ||
+                (violator->seqNum < memDepViolator->seqNum)) {
+                memDepViolator = violator;
+            } else {
+                break;
             }
 
-            incrLdIdx(load_idx);
+            ++lsqMemOrderViolation;
+
+            return genMachineCheckFault();
         }
 
-        // If we've reached this point, there was no violation.
-        memDepViolator = NULL;
+        incrLdIdx(load_idx);
     }
 
     return store_fault;
@@ -660,7 +703,7 @@ LSQUnit<Impl>::writebackStores()
                 panic("LSQ sent out a bad address for a completed store!");
             }
             // Need to handle becoming blocked on a store.
-            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
                     inst->seqNum);
             isStoreBlocked = true;
@@ -735,6 +778,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
     }
 
+    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+        memDepViolator = NULL;
+    }
+
     int store_idx = storeTail;
     decrStIdx(store_idx);
 
@@ -764,6 +811,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete storeQueue[store_idx].req;
+
         storeQueue[store_idx].req = NULL;
         --stores;
 
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
             inst_entry->regsReady = true;
         }
 
+        // Clear the bit saying this instruction can issue.
+        inst->clearCanIssue();
+
         // Add this instruction to the list of dependents.
         store_entry->dependInsts.push_back(inst_entry);
 
@@ -357,7 +360,6 @@ void
 MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 {
     DynInstPtr temp_inst;
-    bool found_inst = false;
 
     // For now this replay function replays all waiting memory ops.
     while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 
         moveToReady(inst_entry);
 
-        if (temp_inst == inst) {
-            found_inst = true;
-        }
-
         instsToReplay.pop_front();
     }
-
-    assert(found_inst);
 }
 
 template <class MemDepPred, class Impl>
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index e303f1cee..eb04ca733 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -168,15 +168,14 @@ template <class Impl>
 void
 DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
 {
-    DPRINTF(Rename, "Setting CPU pointer.\n");
     cpu = cpu_ptr;
+    DPRINTF(Rename, "Setting CPU pointer.\n");
 }
 
 template <class Impl>
 void
 DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 {
-    DPRINTF(Rename, "Setting time buffer pointer.\n");
     timeBuffer = tb_ptr;
 
     // Setup wire to read information from time buffer, from IEW stage.
@@ -193,7 +192,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
 {
-    DPRINTF(Rename, "Setting rename queue pointer.\n");
     renameQueue = rq_ptr;
 
     // Setup wire to write information to future stages.
@@ -204,7 +202,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
 {
-    DPRINTF(Rename, "Setting decode queue pointer.\n");
     decodeQueue = dq_ptr;
 
     // Setup wire to get information from decode.
@@ -228,7 +225,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
 {
-    DPRINTF(Rename, "Setting active threads list pointer.\n");
     activeThreads = at_ptr;
 }
 
@@ -237,8 +233,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
 {
-    DPRINTF(Rename, "Setting rename map pointers.\n");
-
     for (int i=0; i<numThreads; i++) {
         renameMap[i] = &rm_ptr[i];
     }
@@ -248,7 +242,6 @@ template <class Impl>
 void
 DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
 {
-    DPRINTF(Rename, "Setting free list pointer.\n");
     freeList = fl_ptr;
 }
 
@@ -256,7 +249,6 @@ template<class Impl>
 void
 DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
 {
-    DPRINTF(Rename, "Setting scoreboard pointer.\n");
     scoreboard = _scoreboard;
 }
 
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
         // known that the prev reg was outside the range of normal registers
         // so the free list can avoid adding it.
         prev_reg = renamed_reg;
-
-        assert(renamed_reg < numPhysicalRegs + numMiscRegs);
     }
 
     DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index fde636754..975aba379 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -66,7 +66,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
 
     } else if (policy == "partitioned") {
         robPolicy = Partitioned;
-        DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
+//	DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
 
         //@todo:make work if part_amt doesnt divide evenly.
         int part_amt = numEntries / numThreads;
@@ -78,7 +78,7 @@ ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
 
     } else if (policy == "threshold") {
         robPolicy = Threshold;
-        DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
+//	DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
 
         int threshold =  _smtROBThreshold;;
 
diff --git a/src/cpu/o3/sparc/cpu_builder.cc b/src/cpu/o3/sparc/cpu_builder.cc
index 3cac89bad..35badce2c 100644
--- a/src/cpu/o3/sparc/cpu_builder.cc
+++ b/src/cpu/o3/sparc/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+    Param<int> cpu_id;
     Param<int> activity;
 
 #if FULL_SYSTEM
     SimObjectParam<System *> system;
-    Param<int> cpu_id;
     SimObjectParam<SparcISA::ITB *> itb;
     SimObjectParam<SparcISA::DTB *> dtb;
     Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     SparcSimpleParams *params = new SparcSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index d2acc6232..a145e046e 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -103,7 +103,7 @@ void
 O3ThreadContext<Impl>::delVirtPort(VirtualPort *vp)
 {
     if (vp != thread->getVirtPort()) {
-        delete vp->getPeer();
+        vp->removeConn();
         delete vp;
     }
 }
diff --git a/src/cpu/ozone/SConscript b/src/cpu/ozone/SConscript
new file mode 100644
index 000000000..4a040684a
--- /dev/null
+++ b/src/cpu/ozone/SConscript
@@ -0,0 +1,45 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+if 'OzoneCPU' in env['CPU_MODELS']:
+    need_bp_unit = True
+    Source('base_dyn_inst.cc')
+    Source('bpred_unit.cc')
+    Source('cpu.cc')
+    Source('cpu_builder.cc')
+    Source('dyn_inst.cc')
+    Source('front_end.cc')
+    Source('lw_back_end.cc')
+    Source('lw_lsq.cc')
+    Source('rename_table.cc')
+    if env['USE_CHECKER']:
+        Source('checker_builder.cc')
diff --git a/src/cpu/ozone/SConsopts b/src/cpu/ozone/SConsopts
new file mode 100644
index 000000000..341644dcd
--- /dev/null
+++ b/src/cpu/ozone/SConsopts
@@ -0,0 +1,33 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+all_cpu_list.append('OzoneCPU')
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index 4a76ae110..d78162243 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -748,7 +748,7 @@ template <class Impl>
 void
 OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp)
 {
-    delete vp->getPeer();
+    vp->removeConn();
     delete vp;
 }
 #endif
diff --git a/src/cpu/pc_event.cc b/src/cpu/pc_event.cc
index 7ab8bfcb8..438218df2 100644
--- a/src/cpu/pc_event.cc
+++ b/src/cpu/pc_event.cc
@@ -138,14 +138,12 @@ BreakPCEvent::process(ThreadContext *tc)
 }
 
 #if FULL_SYSTEM
-extern "C"
 void
 sched_break_pc_sys(System *sys, Addr addr)
 {
     new BreakPCEvent(&sys->pcEventQueue, "debug break", addr, true);
 }
 
-extern "C"
 void
 sched_break_pc(Addr addr)
 {
diff --git a/src/cpu/simple/SConscript b/src/cpu/simple/SConscript
new file mode 100644
index 000000000..9a6a80473
--- /dev/null
+++ b/src/cpu/simple/SConscript
@@ -0,0 +1,43 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+need_simple_base = False
+if 'AtomicSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    Source('atomic.cc')
+
+if 'TimingSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    Source('timing.cc')
+
+if need_simple_base:
+    Source('base.cc')
diff --git a/src/cpu/simple/SConsopts b/src/cpu/simple/SConsopts
new file mode 100644
index 000000000..32dbda1a5
--- /dev/null
+++ b/src/cpu/simple/SConsopts
@@ -0,0 +1,34 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+all_cpu_list.extend(('AtomicSimpleCPU', 'TimingSimpleCPU'))
+default_cpus.extend(('AtomicSimpleCPU', 'TimingSimpleCPU'))
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 6a14a8aa5..6f69b5ac4 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -516,17 +516,28 @@ AtomicSimpleCPU::tick()
         Fault fault = setupFetchRequest(ifetch_req);
 
         if (fault == NoFault) {
-            ifetch_pkt->reinitFromRequest();
+            Tick icache_latency = 0;
+            bool icache_access = false;
+            dcache_access = false; // assume no dcache access
 
-            Tick icache_latency = icachePort.sendAtomic(ifetch_pkt);
-            // ifetch_req is initialized to read the instruction directly
-            // into the CPU object's inst field.
+            //Fetch more instruction memory if necessary
+            if(predecoder.needMoreBytes())
+            {
+                icache_access = true;
+                ifetch_pkt->reinitFromRequest();
+
+                icache_latency = icachePort.sendAtomic(ifetch_pkt);
+                // ifetch_req is initialized to read the instruction directly
+                // into the CPU object's inst field.
+            }
 
-            dcache_access = false; // assume no dcache access
             preExecute();
 
-            fault = curStaticInst->execute(this, traceData);
-            postExecute();
+            if(curStaticInst)
+            {
+                fault = curStaticInst->execute(this, traceData);
+                postExecute();
+            }
 
             // @todo remove me after debugging with legion done
             if (curStaticInst && (!curStaticInst->isMicroOp() ||
@@ -534,7 +545,8 @@ AtomicSimpleCPU::tick()
                 instCnt++;
 
             if (simulate_stalls) {
-                Tick icache_stall = icache_latency - cycles(1);
+                Tick icache_stall =
+                    icache_access ? icache_latency - cycles(1) : 0;
                 Tick dcache_stall =
                     dcache_access ? dcache_latency - cycles(1) : 0;
                 Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1);
@@ -545,8 +557,8 @@ AtomicSimpleCPU::tick()
             }
 
         }
-
-        advancePC(fault);
+        if(predecoder.needMoreBytes())
+            advancePC(fault);
     }
 
     if (_status != Idle)
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index f6c109127..877dc5bd4 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -70,7 +70,7 @@ using namespace std;
 using namespace TheISA;
 
 BaseSimpleCPU::BaseSimpleCPU(Params *p)
-    : BaseCPU(p), thread(NULL)
+    : BaseCPU(p), thread(NULL), predecoder(NULL)
 {
 #if FULL_SYSTEM
     thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
@@ -301,7 +301,7 @@ BaseSimpleCPU::post_interrupt(int int_num, int index)
     BaseCPU::post_interrupt(int_num, index);
 
     if (thread->status() == ThreadContext::Suspended) {
-                DPRINTF(IPI,"Suspended Processor awoke\n");
+                DPRINTF(Quiesce,"Suspended Processor awoke\n");
         thread->activate();
     }
 }
@@ -367,18 +367,23 @@ BaseSimpleCPU::preExecute()
     inst = gtoh(inst);
     //If we're not in the middle of a macro instruction
     if (!curMacroStaticInst) {
-#if THE_ISA == ALPHA_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->readPC()));
-#elif THE_ISA == SPARC_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC()));
-#elif THE_ISA == X86_ISA
-        StaticInstPtr instPtr = StaticInst::decode(makeExtMI(inst, thread->getTC()));
-#elif THE_ISA == MIPS_ISA
-        //Mips doesn't do anything in it's MakeExtMI function right now,
-        //so it won't be called.
-        StaticInstPtr instPtr = StaticInst::decode(inst);
-#endif
-        if (instPtr->isMacroOp()) {
+        StaticInstPtr instPtr = NULL;
+
+        //Predecode, ie bundle up an ExtMachInst
+        //This should go away once the constructor can be set up properly
+        predecoder.setTC(thread->getTC());
+        //If more fetch data is needed, pass it in.
+        if(predecoder.needMoreBytes())
+            predecoder.moreBytes(thread->readPC(), 0, inst);
+        else
+            predecoder.process();
+        //If an instruction is ready, decode it
+        if (predecoder.extMachInstReady())
+            instPtr = StaticInst::decode(predecoder.getExtMachInst());
+
+        //If we decoded an instruction and it's microcoded, start pulling
+        //out micro ops
+        if (instPtr && instPtr->isMacroOp()) {
             curMacroStaticInst = instPtr;
             curStaticInst = curMacroStaticInst->
                 fetchMicroOp(thread->readMicroPC());
@@ -391,17 +396,19 @@ BaseSimpleCPU::preExecute()
             fetchMicroOp(thread->readMicroPC());
     }
 
+    //If we decoded an instruction this "tick", record information about it.
+    if(curStaticInst)
+    {
+        traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
+                                         thread->readPC());
 
-    traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
-                                     thread->readPC());
-
-    DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n",
-            curStaticInst->getName(), curStaticInst->getOpcode(),
-            curStaticInst->machInst);
+        DPRINTF(Decode,"Decode: Decoded %s instruction: 0x%x\n",
+                curStaticInst->getName(), curStaticInst->machInst);
 
 #if FULL_SYSTEM
-    thread->setInst(inst);
+        thread->setInst(inst);
 #endif // FULL_SYSTEM
+    }
 }
 
 void
@@ -411,7 +418,8 @@ BaseSimpleCPU::postExecute()
     if (thread->profile) {
         bool usermode = TheISA::inUserMode(tc);
         thread->profilePC = usermode ? 1 : thread->readPC();
-        ProfileNode *node = thread->profile->consume(tc, inst);
+        StaticInstPtr si(inst);
+        ProfileNode *node = thread->profile->consume(tc, si);
         if (node)
             thread->profileNode = node;
     }
@@ -444,9 +452,9 @@ BaseSimpleCPU::advancePC(Fault fault)
         fault->invoke(tc);
         thread->setMicroPC(0);
         thread->setNextMicroPC(1);
-    } else {
+    } else if (predecoder.needMoreBytes()) {
         //If we're at the last micro op for this instruction
-        if (curStaticInst->isLastMicroOp()) {
+        if (curStaticInst && curStaticInst->isLastMicroOp()) {
             //We should be working with a macro op
             assert(curMacroStaticInst);
             //Close out this macro op, and clean up the
@@ -465,13 +473,9 @@ BaseSimpleCPU::advancePC(Fault fault)
         } else {
             // go to the next instruction
             thread->setPC(thread->readNextPC());
-#if ISA_HAS_DELAY_SLOT
             thread->setNextPC(thread->readNextNPC());
             thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
             assert(thread->readNextPC() != thread->readNextNPC());
-#else
-            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
-#endif
         }
     }
 
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index eae24014b..787259c96 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -33,6 +33,7 @@
 #ifndef __CPU_SIMPLE_BASE_HH__
 #define __CPU_SIMPLE_BASE_HH__
 
+#include "arch/predecoder.hh"
 #include "base/statistics.hh"
 #include "config/full_system.hh"
 #include "cpu/base.hh"
@@ -63,6 +64,10 @@ class Process;
 class RemoteGDB;
 class GDBListener;
 
+namespace TheISA
+{
+    class Predecoder;
+}
 class ThreadContext;
 class Checkpoint;
 
@@ -74,7 +79,6 @@ namespace Trace {
 class BaseSimpleCPU : public BaseCPU
 {
   protected:
-    typedef TheISA::MachInst MachInst;
     typedef TheISA::MiscReg MiscReg;
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
@@ -122,7 +126,10 @@ class BaseSimpleCPU : public BaseCPU
 #endif
 
     // current instruction
-    MachInst inst;
+    TheISA::MachInst inst;
+
+    // The predecoder
+    TheISA::Predecoder predecoder;
 
     // Static data storage
     TheISA::LargestRead dataReg;
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 13d0e2e29..39f31782b 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -305,7 +305,7 @@ void
 SimpleThread::delVirtPort(VirtualPort *vp)
 {
     if (vp != virtPort) {
-        delete vp->getPeer();
+        vp->removeConn();
         delete vp;
     }
 }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 416c8ab56..a58ac85d6 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -35,6 +35,7 @@
 #include <string>
 
 #include "arch/isa_traits.hh"
+#include "arch/utility.hh"
 #include "sim/faults.hh"
 #include "base/bitfield.hh"
 #include "base/hashmap.hh"
@@ -439,9 +440,6 @@ class StaticInst : public StaticInstBase
     //This is defined as inline below.
     static StaticInstPtr decode(ExtMachInst mach_inst);
 
-    /// Return opcode of machine instruction
-    uint32_t getOpcode() { return bits(machInst, 31, 26);}
-
     /// Return name of machine instruction
     std::string getName() { return mnemonic; }
 };
@@ -474,7 +472,7 @@ class StaticInstPtr : public RefCountingPtr<StaticInst>
 
     /// Construct directly from machine instruction.
     /// Calls StaticInst::decode().
-    StaticInstPtr(TheISA::ExtMachInst mach_inst)
+    explicit StaticInstPtr(TheISA::ExtMachInst mach_inst)
         : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst))
     {
     }
diff --git a/src/cpu/trace/SConscript b/src/cpu/trace/SConscript
new file mode 100644
index 000000000..f166b2f23
--- /dev/null
+++ b/src/cpu/trace/SConscript
@@ -0,0 +1,40 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+if False:
+    Source('opt_cpu.cc')
+    Source('trace_cpu.cc')
+
+    Source('reader/mem_trace_reader.cc')
+    Source('reader/ibm_reader.cc')
+    Source('reader/itx_reader.cc')
+    Source('reader/m5_reader.cc')