From 7b3b362ba5d2690324abd58c883fd1d5fe4dc767 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 17:18:15 -0500
Subject: inorder: init internal debug cpu counters - cpuEventNum - resReqCount

---
 src/cpu/inorder/cpu.cc      | 60 ++++++++++++++++++++++++++++++++-------------
 src/cpu/inorder/cpu.hh      | 32 ++++++++++++++++++------
 src/cpu/inorder/resource.cc | 54 ++++++++++++++++++++++++++++++++++------
 src/cpu/inorder/resource.hh | 40 ++++++++++--------------------
 4 files changed, 128 insertions(+), 58 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 1e3fdc40e..38f6b4eed 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -115,7 +115,8 @@ InOrderCPU::CPUEvent::process()
         cpu->activateThread(tid);
         break;
 
-      //@TODO: Consider Implementing "Suspend Thread" as Separate from Deallocate
+      //@TODO: Consider Implementing "Suspend Thread" as Separate from 
+      //Deallocate
       case SuspendThread: // Suspend & Deallocate are same for now.
         //cpu->suspendThread(tid);
         //break;
@@ -145,11 +146,14 @@ InOrderCPU::CPUEvent::process()
 
       default:
         fatal("Unrecognized Event Type %d", cpuEventType);
+    
     }
-
+    
     cpu->cpuEventRemoveList.push(this);
 }
 
+    
+
 const char *
 InOrderCPU::CPUEvent::description()
 {
@@ -185,6 +189,10 @@ InOrderCPU::InOrderCPU(Params *params)
       system(params->system),
       physmem(system->physmem),
 #endif // FULL_SYSTEM
+#ifdef DEBUG
+      cpuEventNum(0),
+      resReqCount(0),
+#endif // DEBUG
       switchCount(0),
       deferRegistration(false/*params->deferRegistration*/),
       stageTracing(params->stageTracing),
@@ -301,7 +309,7 @@ InOrderCPU::InOrderCPU(Params *params)
 
     // Define dummy instructions and resource requests to be used.
     DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0);
-    dummyReq = new ResourceRequest(NULL, NULL, 0, 0, 0, 0);
+    dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0);
 
     // Reset CPU to reset state.
 #if FULL_SYSTEM
@@ -322,6 +330,13 @@ InOrderCPU::regStats()
     /* Register the Resource Pool's stats here.*/
     resPool->regStats();
 
+#ifdef DEBUG
+    maxResReqCount
+        .name(name() + ".maxResReqCount")
+        .desc("Maximum number of live resource requests in CPU")
+        .prereq(maxResReqCount);   
+#endif
+
     /* Register any of the InOrderCPU's stats here.*/
     timesIdled
         .name(name() + ".timesIdled")
@@ -342,7 +357,7 @@ InOrderCPU::regStats()
 
     smtCycles
         .name(name() + ".smtCycles")
-        .desc("Total number of cycles that the CPU was simultaneous multithreading.(SMT)");
+        .desc("Total number of cycles that the CPU was in SMT-mode");
 
     committedInsts
         .init(numThreads)
@@ -435,7 +450,8 @@ InOrderCPU::tick()
             //Tick next_tick = curTick + cycles(1);
             //tickEvent.schedule(next_tick);
             mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1));
-            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", nextCycle(curTick + 1));
+            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
+                    nextCycle(curTick + 1));
         }
     }
 
@@ -640,8 +656,8 @@ void
 InOrderCPU::addToCurrentThreads(ThreadID tid)
 {
     if (!isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n",
-                tid);
+        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU."
+                "\n", tid);
         currentThreads.push_back(tid);
     }
 }
@@ -1002,9 +1018,11 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {                   // Integer Register File
+    if (reg_idx < FP_Base_DepTag) {                   
+        // Integer Register File
         return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          // Float Register File
+    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        // Float Register File
         reg_idx -= FP_Base_DepTag;
         return readFloatRegBits(reg_idx, tid);
     } else {
@@ -1070,9 +1088,12 @@ InOrderCPU::addInst(DynInstPtr &inst)
 void
 InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 {
-    // Set the CPU's PCs - This contributes to the precise state of the CPU which can be used
-    // when restoring a thread to the CPU after a fork or after an exception
-    // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if it's a branch or not
+    // Set the CPU's PCs - This contributes to the precise state of the CPU 
+    // which can be used when restoring a thread to the CPU after a fork or 
+    // after an exception
+    // =================
+    // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if 
+    // it's a branch or not
     setPC(inst->readPC(), tid);
     setNextPC(inst->readNextPC(), tid);
     setNextNPC(inst->readNextNPC(), tid);
@@ -1112,7 +1133,8 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 
     // Broadcast to other resources an instruction
     // has been completed
-    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, tid);
+    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
+                           tid);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
@@ -1380,7 +1402,8 @@ InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
 {
     //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
     //       you want to run w/out caches?
-    CacheUnit *cache_res = dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+    CacheUnit *cache_res = 
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
 
     return cache_res->read(inst, addr, data, flags);
 }
@@ -1483,14 +1506,16 @@ InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr,
 
 template<>
 Fault
-InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
+                  uint64_t *res)
 {
     return write(inst, *(uint64_t*)&data, addr, flags, res);
 }
 
 template<>
 Fault
-InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
+                  uint64_t *res)
 {
     return write(inst, *(uint32_t*)&data, addr, flags, res);
 }
@@ -1498,7 +1523,8 @@ InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64
 
 template<>
 Fault
-InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
+                  uint64_t *res)
 {
     return write(inst, (uint32_t)data, addr, flags, res);
 }
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 3320532ba..463ca5445 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -144,9 +144,11 @@ class InOrderCPU : public BaseCPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-          mainEventQueue.reschedule(&tickEvent, nextCycle(curTick + ticks(delay)));
+          mainEventQueue.reschedule(&tickEvent, 
+                                    nextCycle(curTick + ticks(delay)));
         else if (!tickEvent.scheduled())
-          mainEventQueue.schedule(&tickEvent, nextCycle(curTick + ticks(delay)));
+          mainEventQueue.schedule(&tickEvent, 
+                                  nextCycle(curTick + ticks(delay)));
     }
 
     /** Unschedule tick event, regardless of its current state. */
@@ -228,7 +230,8 @@ class InOrderCPU : public BaseCPU
     /** Interface between the CPU and CPU resources. */
     ResourcePool *resPool;
 
-    /** Instruction used to signify that there is no *real* instruction in buffer slot */
+    /** Instruction used to signify that there is no *real* instruction in 
+        buffer slot */
     DynInstPtr dummyBufferInst;
 
     /** Used by resources to signify a denied access to a resource. */
@@ -420,7 +423,11 @@ class InOrderCPU : public BaseCPU
     /** Get & Update Next Event Number */
     InstSeqNum getNextEventNum()
     {
+#ifdef DEBUG
         return cpuEventNum++;
+#else
+        return 0;
+#endif
     }
 
     /** Register file accessors  */
@@ -550,8 +557,8 @@ class InOrderCPU : public BaseCPU
      */
     std::queue<ListIt> removeList;
 
-    /** List of all the resource requests that will be removed at the end of this
-     *  cycle.
+    /** List of all the resource requests that will be removed at the end 
+     *  of this cycle.
      */
     std::queue<ResourceRequest*> reqRemoveList;
 
@@ -632,8 +639,12 @@ class InOrderCPU : public BaseCPU
 
     // LL/SC debug functionality
     unsigned stCondFails;
-    unsigned readStCondFailures() { return stCondFails; }
-    unsigned setStCondFailures(unsigned st_fails) { return stCondFails = st_fails; }
+
+    unsigned readStCondFailures() 
+    { return stCondFails; }
+
+    unsigned setStCondFailures(unsigned st_fails) 
+    { return stCondFails = st_fails; }
 
     /** Returns a pointer to a thread context. */
     ThreadContext *tcBase(ThreadID tid = 0)
@@ -663,9 +674,16 @@ class InOrderCPU : public BaseCPU
     /** The global sequence number counter. */
     InstSeqNum globalSeqNum[ThePipeline::MaxThreads];
 
+#ifdef DEBUG
     /** The global event number counter. */
     InstSeqNum cpuEventNum;
 
+    /** Number of resource requests active in CPU **/
+    unsigned resReqCount;
+
+    Stats::Scalar maxResReqCount;    
+#endif
+
     /** Counter of how many stages have completed switching out. */
     int switchCount;
 
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index cb5681bc1..286332e08 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -80,7 +80,8 @@ Resource::regStats()
 {
     instReqsProcessed
         .name(name() + ".instReqsProcessed")
-        .desc("Number of Instructions Requests that completed in this resource.");
+        .desc("Number of Instructions Requests that completed in "
+              "this resource.");
 }
 
 int
@@ -98,7 +99,8 @@ Resource::slotsInUse()
 void
 Resource::freeSlot(int slot_idx)
 {
-    DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource [slot:%i].\n",
+    DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource "
+            "[slot:%i].\n",
             reqMap[slot_idx]->inst->readTid(),
             reqMap[slot_idx]->inst->seqNum,
             slot_idx);
@@ -159,7 +161,8 @@ Resource::getSlot(DynInstPtr inst)
 
         while (map_it != map_end) {
             if ((*map_it).second) {
-                DPRINTF(Resource, "Currently Serving request from: [tid:%i] [sn:%i].\n",
+                DPRINTF(Resource, "Currently Serving request from: "
+                        "[tid:%i] [sn:%i].\n",
                         (*map_it).second->getInst()->readTid(),
                         (*map_it).second->getInst()->seqNum);
             }
@@ -202,10 +205,12 @@ Resource::request(DynInstPtr inst)
             inst_req = getRequest(inst, stage_num, id, slot_num, cmd);
 
             if (inst->staticInst) {
-                DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource.\n",
+                DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this "
+                        "resource.\n",
                         inst->readTid(), inst->seqNum);
             } else {
-                DPRINTF(Resource, "[tid:%i]: instruction requesting this resource.\n",
+                DPRINTF(Resource, "[tid:%i]: instruction requesting this "
+                        "resource.\n",
                         inst->readTid());
             }
 
@@ -232,7 +237,8 @@ Resource::requestAgain(DynInstPtr inst, bool &do_request)
     do_request = true;
 
     if (inst->staticInst) {
-        DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource again.\n",
+        DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource "
+                "again.\n",
                 inst->readTid(), inst->seqNum);
     } else {
         DPRINTF(Resource, "[tid:%i]: requesting this resource again.\n",
@@ -394,7 +400,41 @@ Resource::unscheduleEvent(DynInstPtr inst)
 
 int ResourceRequest::resReqID = 0;
 
-int ResourceRequest::resReqCount = 0;
+int ResourceRequest::maxReqCount = 0;
+
+ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, 
+                                 int stage_num, int res_idx, int slot_num, 
+                                 unsigned _cmd)
+    : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
+      resIdx(res_idx), slotNum(slot_num), completed(false),
+      squashed(false), processing(false), waiting(false)
+{
+#ifdef DEBUG
+        reqID = resReqID++;
+        res->cpu->resReqCount++;
+        DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, 
+                res->cpu->resReqCount);
+
+        if (res->cpu->resReqCount > 100) {
+            fatal("Too many undeleted resource requests. Memory leak?\n");
+        }
+
+        if (res->cpu->resReqCount > maxReqCount) {            
+            maxReqCount = res->cpu->resReqCount;
+            res->cpu->maxResReqCount = maxReqCount;            
+        }
+        
+#endif
+}
+
+ResourceRequest::~ResourceRequest()
+{
+#ifdef DEBUG
+        res->cpu->resReqCount--;
+        DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, 
+                res->cpu->resReqCount);
+#endif
+}
 
 void
 ResourceRequest::done(bool completed)
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index 605b7f690..2cf8e61eb 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -70,7 +70,8 @@ class Resource {
     /** Define this function if resource, has a port to connect to an outside
      *  simulation object.
      */
-    virtual Port* getPort(const std::string &if_name, int idx) { return NULL; }
+    virtual Port* getPort(const std::string &if_name, int idx) 
+    { return NULL; }
 
     /** Return ID for this resource */
     int getId() { return id; }
@@ -114,9 +115,9 @@ class Resource {
     /** Free a resource slot */
     virtual void freeSlot(int slot_idx);
 
-    /** Request usage of a resource for this instruction. If this instruction already
-     *  has made this request to this resource, and that request is uncompleted
-     *  this function will just return that request
+    /** Request usage of a resource for this instruction. If this instruction 
+     *  already has made this request to this resource, and that request is 
+     *  uncompleted this function will just return that request
      */
     virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                         int res_idx, int slot_num,
@@ -166,7 +167,8 @@ class Resource {
     /** Schedule resource event, regardless of its current state. */
     void scheduleEvent(int slot_idx, int delay);
 
-    /** Find instruction in list, Schedule resource event, regardless of its current state. */
+    /** Find instruction in list, Schedule resource event, regardless of its 
+     *  current state. */
     bool scheduleEvent(DynInstPtr inst, int delay);
 
     /** Unschedule resource event, regardless of its current state. */
@@ -303,30 +305,14 @@ class ResourceRequest
 
     static int resReqID;
 
-    static int resReqCount;
-
+    static int maxReqCount;
+    
   public:
     ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num,
-                    int res_idx, int slot_num, unsigned _cmd)
-        : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
-          resIdx(res_idx), slotNum(slot_num), completed(false),
-          squashed(false), processing(false), waiting(false)
-    {
-        reqID = resReqID++;
-        resReqCount++;
-        DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, resReqCount);
-
-        if (resReqCount > 100) {
-            fatal("Too many undeleted resource requests. Memory leak?\n");
-        }
-    }
-
-    virtual ~ResourceRequest()
-    {
-        resReqCount--;
-        DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, resReqCount);
-    }
-
+                    int res_idx, int slot_num, unsigned _cmd);
+    
+    virtual ~ResourceRequest();
+    
     int reqID;
 
     /** Acknowledge that this is a request is done and remove
-- 
cgit v1.2.3


From 0e96798fe0a56936f8590dbd301f2b07a1850e22 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:25:13 -0500
Subject: configs/inorder: add options for switch-on-miss to inorder cpu

---
 src/cpu/inorder/InOrderCPU.py |  5 +++++
 src/cpu/inorder/cpu.cc        | 11 ++++++++++-
 src/cpu/inorder/cpu.hh        | 10 +++++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py
index a0b0466a7..d6db346d4 100644
--- a/src/cpu/inorder/InOrderCPU.py
+++ b/src/cpu/inorder/InOrderCPU.py
@@ -30,10 +30,15 @@ from m5.params import *
 from m5.proxy import *
 from BaseCPU import BaseCPU
 
+class ThreadModel(Enum):
+    vals = ['Single', 'SMT', 'SwitchOnCacheMiss']
+
 class InOrderCPU(BaseCPU):
     type = 'InOrderCPU'
     activity = Param.Unsigned(0, "Initial count")
 
+    threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)")
+    
     cachePorts = Param.Unsigned(2, "Cache Ports")
     stageWidth = Param.Unsigned(1, "Stage width")
 
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 38f6b4eed..a1e6c9c86 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -197,7 +197,7 @@ InOrderCPU::InOrderCPU(Params *params)
       deferRegistration(false/*params->deferRegistration*/),
       stageTracing(params->stageTracing),
       numVirtProcs(1)
-{
+{    
     ThreadID active_threads;
     cpu_params = params;
 
@@ -216,6 +216,15 @@ InOrderCPU::InOrderCPU(Params *params)
               "in your InOrder implementation or "
               "edit your workload size.");
     }
+
+    if (active_threads > 1) {
+        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+    } else {
+        threadModel = Single;
+    }
+     
+        
+    
 #endif
 
     // Bind the fetch & data ports from the resource pool.
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 463ca5445..804054f8c 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -100,6 +100,15 @@ class InOrderCPU : public BaseCPU
     /** Type of core that this is */
     std::string coreType;
 
+    // Only need for SE MODE
+    enum ThreadModel {
+        Single,
+        SMT,
+        SwitchOnCacheMiss
+    };
+    
+    ThreadModel threadModel;
+
     int readCpuId() { return cpu_id; }
 
     void setCpuId(int val) { cpu_id = val; }
@@ -117,7 +126,6 @@ class InOrderCPU : public BaseCPU
 
     /** Overall CPU status. */
     Status _status;
-
   private:
     /** Define TickEvent for the CPU */
     class TickEvent : public Event
-- 
cgit v1.2.3


From a892af7b261e1c48b06ccbded5551e958c778414 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:25:27 -0500
Subject: inorder: dont allow early loads - loads were happening on same cycle
 as the address was generated which is slightly unrealistic. Instead, force
 address generation to be on separate cycle from load initiation - also, mark
 the stages in a more traditional way (F-D-X-M-W)

---
 src/cpu/inorder/pipeline_traits.cc | 49 +++++++++++++++++++++-----------------
 src/cpu/inorder/pipeline_traits.hh |  3 ++-
 2 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc
index ed72ab1d0..8ff26dce2 100644
--- a/src/cpu/inorder/pipeline_traits.cc
+++ b/src/cpu/inorder/pipeline_traits.cc
@@ -65,16 +65,18 @@ int getNextPriority(DynInstPtr &inst, int stage_num)
 
 void createFrontEndSchedule(DynInstPtr &inst)
 {
-    InstStage *I = inst->addStage();
-    InstStage *E = inst->addStage();
-
-    I->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
-    I->needs(ICache, CacheUnit::InitiateFetch);
-
-    E->needs(ICache, CacheUnit::CompleteFetch);
-    E->needs(Decode, DecodeUnit::DecodeInst);
-    E->needs(BPred, BranchPredictor::PredictBranch);
-    E->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
+    InstStage *F = inst->addStage();
+    InstStage *D = inst->addStage();
+
+    // FETCH
+    F->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
+    F->needs(ICache, CacheUnit::InitiateFetch);
+
+    // DECODE
+    D->needs(ICache, CacheUnit::CompleteFetch);
+    D->needs(Decode, DecodeUnit::DecodeInst);
+    D->needs(BPred, BranchPredictor::PredictBranch);
+    D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
 }
 
 bool createBackEndSchedule(DynInstPtr &inst)
@@ -83,45 +85,48 @@ bool createBackEndSchedule(DynInstPtr &inst)
         return false;
     }
 
-    InstStage *E = inst->currentStage();
+    InstStage *X = inst->addStage();
     InstStage *M = inst->addStage();
-    InstStage *A = inst->addStage();
     InstStage *W = inst->addStage();
 
+    // EXECUTE
     for (int idx=0; idx < inst->numSrcRegs(); idx++) {
         if (!idx || !inst->isStore()) {
-            E->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
+            X->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
         }
     }
 
-
     if ( inst->isNonSpeculative() ) {
         // skip execution of non speculative insts until later
     } else if ( inst->isMemRef() ) {
         if ( inst->isLoad() ) {
-            E->needs(AGEN, AGENUnit::GenerateAddr);
-            E->needs(DCache, CacheUnit::InitiateReadData);
+            X->needs(AGEN, AGENUnit::GenerateAddr);
         }
     } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        E->needs(MDU, MultDivUnit::StartMultDiv);
+        X->needs(MDU, MultDivUnit::StartMultDiv);
     } else {
-        E->needs(ExecUnit, ExecutionUnit::ExecuteInst);
+        X->needs(ExecUnit, ExecutionUnit::ExecuteInst);
     }
 
     if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        M->needs(MDU, MultDivUnit::EndMultDiv);
+        X->needs(MDU, MultDivUnit::EndMultDiv);
     }
 
+    // MEMORY
     if ( inst->isLoad() ) {
-        M->needs(DCache, CacheUnit::CompleteReadData);
+        M->needs(DCache, CacheUnit::InitiateReadData);
     } else if ( inst->isStore() ) {
         M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
         M->needs(AGEN, AGENUnit::GenerateAddr);
         M->needs(DCache, CacheUnit::InitiateWriteData);
     }
 
-    if ( inst->isStore() ) {
-        A->needs(DCache, CacheUnit::CompleteWriteData);
+
+    // WRITEBACK
+    if ( inst->isLoad() ) {
+        W->needs(DCache, CacheUnit::CompleteReadData);
+    } else if ( inst->isStore() ) {
+        W->needs(DCache, CacheUnit::CompleteWriteData);
     }
 
     if ( inst->isNonSpeculative() ) {
diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh
index 3c28894e7..ddc8a3ad7 100644
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@@ -113,7 +113,8 @@ namespace ThePipeline {
     };
 
     struct entryCompare {
-        bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) const
+        bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) 
+            const
         {
             // Prioritize first by stage number that the resource is needed
             if (lhs->stageNum > rhs->stageNum) {
-- 
cgit v1.2.3


From e8312ab6f700b31dfa357607ab51c9c05014572d Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:25:48 -0500
Subject: inorder: switch out buffer add buffer for instructions to switch out
 to in a pipeline stage can't squash the instruction and remove the pipeline
 so we kind of need to 'suspend' an instruction at the stage while the memory
 stall resolves for the switch on cache miss model

---
 src/cpu/inorder/pipeline_stage.cc | 137 ++++++++++++++++++++++----------------
 src/cpu/inorder/pipeline_stage.hh |  19 ++++--
 2 files changed, 95 insertions(+), 61 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index dc0378bf3..8d14aae27 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -44,6 +44,9 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num)
       stageBufferMax(ThePipeline::interStageBuffSize[stage_num]),
       prevStageValid(false), nextStageValid(false)
 {
+    switchedOutBuffer.resize(ThePipeline::MaxThreads);
+    switchedOutValid.resize(ThePipeline::MaxThreads);
+    
     init(params);
 }
 
@@ -267,7 +270,8 @@ PipelineStage::isBlocked(ThreadID tid)
 bool
 PipelineStage::block(ThreadID tid)
 {
-    DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to previous stages.\n", tid);
+    DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to "
+            "previous stages.\n", tid);
 
     // Add the current inputs to the skid buffer so they can be
     // reprocessed when this stage unblocks.
@@ -296,7 +300,8 @@ PipelineStage::block(ThreadID tid)
 void
 PipelineStage::blockDueToBuffer(ThreadID tid)
 {
-    DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to next stage.\n", tid);
+    DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to "
+            "next stage.\n", tid);
 
     if (stageStatus[tid] != Blocked) {
         // Set the status to Blocked.
@@ -334,8 +339,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
 {
     if (cpu->squashSeqNum[tid] < inst->seqNum &&
         cpu->lastSquashCycle[tid] == curTick){
-        DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another stage's squash "
-                "signal for after [sn:%i].\n", inst->seqNum, cpu->squashSeqNum[tid]);
+        DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another "
+                "stage's squash signal for after [sn:%i].\n", inst->seqNum, 
+                cpu->squashSeqNum[tid]);
     } else {
         // Send back mispredict information.
         toPrevStages->stageInfo[stageNum][tid].branchMispredict = true;
@@ -346,20 +352,28 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
 
 
 #if ISA_HAS_DELAY_SLOT
-        toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() !=
+        toPrevStages->stageInfo[stageNum][tid].branchTaken = 
+            inst->readNextNPC() !=
             (inst->readNextPC() + sizeof(TheISA::MachInst));
-        toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum;
+
+        toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = 
+            inst->bdelaySeqNum;
+
         InstSeqNum squash_seq_num = inst->bdelaySeqNum;
 #else
-        toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() !=
+        toPrevStages->stageInfo[stageNum][tid].branchTaken = 
+            inst->readNextPC() !=
             (inst->readPC() + sizeof(TheISA::MachInst));
+
         toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum;
         InstSeqNum squash_seq_num = inst->seqNum;
 #endif
 
-        DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
-        DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] "
-                "branch.\n", tid, squash_seq_num, inst->seqNum);
+        DPRINTF(InOrderStage, "Target being re-set to %08p\n", 
+                inst->readPredTarg());
+        DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], "
+                "due to [sn:%i] branch.\n", tid, squash_seq_num, 
+                inst->seqNum);
 
         // Save squash num for later stage use
         cpu->squashSeqNum[tid] = squash_seq_num;
@@ -394,8 +408,8 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
 
     squashPrevStageInsts(squash_seq_num, tid);
 
-    DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage skidbuffer.\n",
-            tid);
+    DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage"
+            " skidbuffer.\n", tid);
     while (!skidBuffer[tid].empty()) {
         if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
             DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer "
@@ -404,8 +418,9 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
                     skidBuffer[tid].size());
             break;
         }
-        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
-                tid, skidBuffer[tid].front()->seqNum, skidBuffer[tid].front()->PC);
+        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
+                " PC %08p.\n", tid, skidBuffer[tid].front()->seqNum, 
+                skidBuffer[tid].front()->PC);
         skidBuffer[tid].pop();
     }
 
@@ -427,7 +442,8 @@ PipelineStage::stageBufferAvail()
     int avail = stageBufferMax - total -0;// incoming_insts;
 
     if (avail < 0)
-        fatal("stageNum %i:stageBufferAvail() < 0...stBMax=%i,total=%i,incoming=%i=>%i",
+        fatal("stageNum %i:stageBufferAvail() < 0..."
+              "stBMax=%i,total=%i,incoming=%i=>%i",
               stageNum, stageBufferMax, total, incoming_insts, avail);
 
     return avail;
@@ -443,7 +459,8 @@ PipelineStage::canSendInstToStage(unsigned stage_num)
     }
 
     if (!buffer_avail && nextStageQueueValid(stage_num)) {
-        DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", stageNum + 1);
+        DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", 
+                stageNum + 1);
     }
 
     return buffer_avail;
@@ -461,8 +478,9 @@ PipelineStage::skidInsert(ThreadID tid)
 
         assert(tid == inst->threadNumber);
 
-        DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
-                tid, inst->seqNum, inst->readPC(), inst->threadNumber);
+        DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage "
+                "skidBuffer %i\n", tid, inst->seqNum, inst->readPC(), 
+                inst->threadNumber);
 
         skidBuffer[tid].push(inst);
     }
@@ -547,16 +565,16 @@ PipelineStage::sortInsts()
         for (int i = 0; i < insts_from_prev_stage; ++i) {
 
             if (prevStage->insts[i]->isSquashed()) {
-                DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], not inserting "
-                        "into stage buffer.\n",
+                DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], "
+                        "not inserting into stage buffer.\n",
                     prevStage->insts[i]->readTid(),
                     prevStage->insts[i]->seqNum);
 
                 continue;
             }
 
-            DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage buffer.\n",
-                    prevStage->insts[i]->readTid(),
+            DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage "
+                    "buffer.\n", prevStage->insts[i]->readTid(),
                     prevStage->insts[i]->seqNum);
 
             ThreadID tid = prevStage->insts[i]->threadNumber;
@@ -611,8 +629,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
     // Check for squash from later pipeline stages
     for (int stage_idx=stageNum; stage_idx < NumStages; stage_idx++) {
         if (fromNextStages->stageInfo[stage_idx][tid].squash) {
-            DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to squash "
-                "from stage %u.\n", tid, stage_idx);
+            DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to "
+                    "squash from stage %u.\n", tid, stage_idx);
             InstSeqNum squash_seq_num = fromNextStages->
                 stageInfo[stage_idx][tid].bdelayDoneSeqNum;
             squash(squash_seq_num, tid);
@@ -625,8 +643,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
     }
 
     if (stageStatus[tid] == Blocked) {
-        DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to unblocking.\n",
-                tid);
+        DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to "
+                "unblocking.\n", tid);
 
         stageStatus[tid] = Unblocking;
 
@@ -637,15 +655,15 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
 
     if (stageStatus[tid] == Squashing) {
         if (!skidBuffer[tid].empty()) {
-            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to unblocking.\n",
-                    tid);
+            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
+                    "unblocking.\n", tid);
 
             stageStatus[tid] = Unblocking;
         } else {
             // Switch status to running if stage isn't being told to block or
             // squash this cycle.
-            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to running.\n",
-                    tid);
+            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
+                    "running.\n", tid);
 
             stageStatus[tid] = Running;
         }
@@ -717,13 +735,13 @@ PipelineStage::unsetResStall(ResReqPtr res_req, ThreadID tid)
     }
 
     if (stalls[tid].resources.size() == 0) {
-        DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource stalls.\n",
-                tid);
+        DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource"
+                "stalls.\n", tid);
     }
 }
 
-// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be handled
-// one at a time, but instead first come first serve by instruction?
+// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be 
+// handled one at a time, but instead first come first serve by instruction?
 // Questions are how should a pipeline stage handle thread-specific stalls &
 // pipeline squashes
 void
@@ -749,8 +767,8 @@ PipelineStage::processStage(bool &status_change)
     DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(),
             stageNum);
 
-    DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", stageBufferAvail(),
-            stageNum);
+    DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", 
+            stageBufferAvail(), stageNum);
 }
 
 void
@@ -828,8 +846,8 @@ PipelineStage::processInsts(ThreadID tid)
 
         inst = insts_to_stage.front();
 
-        DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] with "
-                "PC %#x\n",
+        DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] "
+                "with PC %#x\n",
                 tid, inst->seqNum, inst->readPC());
 
         if (inst->isSquashed()) {
@@ -856,8 +874,8 @@ PipelineStage::processInsts(ThreadID tid)
 
         // Send to Next Stage or Break Loop
         if (nextStageValid && !sendInstToNextStage(inst)) {
-            DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage %i.\n",
-                    tid, inst->seqNum,inst->nextStage);
+            DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage"
+                    " %i.\n", tid, inst->seqNum,inst->nextStage);
             break;
         }
 
@@ -897,14 +915,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
             int res_num = inst->nextResource();
 
 
-            DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s.\n",
-                    tid, inst->seqNum, cpu->resPool->name(res_num));
+            DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s."
+                    "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
 
             ResReqPtr req = cpu->resPool->request(res_num, inst);
 
             if (req->isCompleted()) {
-                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s completed.\n",
-                        tid, inst->seqNum, cpu->resPool->name(res_num));
+                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s "
+                        "completed.\n", tid, inst->seqNum, 
+                        cpu->resPool->name(res_num));
 
                 if (req->fault == NoFault) {
                     inst->popSchedEntry();
@@ -913,8 +932,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                           curTick, req->fault->name());
                 }
             } else {
-                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed.\n",
-                        tid, inst->seqNum, cpu->resPool->name(res_num));
+                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
+                        "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
 
                 last_req_completed = false;
 
@@ -956,12 +975,12 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
     assert(next_stage >= 1);
     assert(prev_stage >= 0);
 
-    DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to stage %u.\n", tid,
-            stageNum+1);
+    DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to "
+            "stage %u.\n", tid, stageNum+1);
 
     if (!canSendInstToStage(inst->nextStage)) {
-        DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to stage %u.\n", tid,
-            stageNum+1);
+        DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to "
+                "stage %u.\n", tid, stageNum+1);
         return false;
     }
 
@@ -969,12 +988,14 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
     if (nextStageQueueValid(inst->nextStage - 1)) {
         if (inst->seqNum > cpu->squashSeqNum[tid] &&
             curTick == cpu->lastSquashCycle[tid]) {
-            DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping insertion "
-                    "into stage %i queue.\n", tid, inst->seqNum, inst->nextStage);
+            DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping "
+                    "insertion into stage %i queue.\n", tid, inst->seqNum, 
+                    inst->nextStage);
         } else {
             if (nextStageValid) {
-                DPRINTF(InOrderStage, "[tid:%u] %i slots available in next stage buffer.\n",
-                    tid, cpu->pipelineStage[next_stage]->stageBufferAvail());
+                DPRINTF(InOrderStage, "[tid:%u] %i slots available in next "
+                        "stage buffer.\n", tid, 
+                        cpu->pipelineStage[next_stage]->stageBufferAvail());
             }
 
             DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into  "
@@ -982,11 +1003,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
                     tid, inst->seqNum, toNextStageIndex,
                     cpu->pipelineStage[prev_stage]->nextStageQueue->id());
 
-            int next_stage_idx = cpu->pipelineStage[prev_stage]->nextStage->size;
+            int next_stage_idx = 
+                cpu->pipelineStage[prev_stage]->nextStage->size;
 
-            // Place instructions in inter-stage communication struct for the next
+            // Place instructions in inter-stage communication struct for next
             // pipeline stage to read next cycle
-            cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] = inst;
+            cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] 
+                = inst;
 
             ++(cpu->pipelineStage[prev_stage]->nextStage->size);
 
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index 86ee98132..42a632560 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -240,6 +240,8 @@ class PipelineStage
      */
     virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
 
+    virtual void squashDueToMemStall(DynInstPtr &inst, ThreadID tid);
+
     /** Squash instructions from stage buffer  */
     virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid);
 
@@ -259,19 +261,28 @@ class PipelineStage
     /** List of active thread ids */
     std::list<ThreadID> *activeThreads;
 
+    /** Buffer of instructions switched out to mem-stall. 
+     *  Only used when using SwitchOnCacheMiss threading model
+     *  Used as 1-to-1 mapping between ThreadID and Entry. 
+     */
+    std::vector<DynInstPtr> switchedOutBuffer;
+    std::vector<bool> switchedOutValid;
+
     /** Queue of all instructions coming from previous stage on this cycle. */
     std::queue<DynInstPtr> insts[ThePipeline::MaxThreads];
 
-    /** Queue of instructions that are finished processing and ready to go next stage.
-     *  This is used to prevent from processing an instrution more than once on any
-     *  stage. NOTE: It is up to the PROGRAMMER must manage this as a queue
+    /** Queue of instructions that are finished processing and ready to go 
+     *  next stage. This is used to prevent from processing an instrution more 
+     *  than once on any stage. NOTE: It is up to the PROGRAMMER must manage 
+     *  this as a queue
      */
     std::list<DynInstPtr> instsToNextStage;
 
     /** Skid buffer between previous stage and this one. */
     std::queue<DynInstPtr> skidBuffer[ThePipeline::MaxThreads];
 
-    /** Instruction used to signify that there is no *real* instruction in buffer slot */
+    /** Instruction used to signify that there is no *real* instruction in
+     *  buffer slot */
     DynInstPtr dummyBufferInst;
 
     /** SeqNum of Squashing Branch Delay Instruction (used for MIPS) */
-- 
cgit v1.2.3


From d8e0935af2805bc2c4bdfbab7de2c63f7fde46f7 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:03 -0500
Subject: inorder: add insts to cpu event some events are going to need
 instruction data when they process, so just include the instruction in the
 event construction

---
 src/cpu/inorder/cpu.cc | 29 +++++++++++++++--------------
 src/cpu/inorder/cpu.hh | 16 +++++++++-------
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index a1e6c9c86..69aea0c57 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -84,10 +84,10 @@ InOrderCPU::TickEvent::description()
 }
 
 InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
-                             Fault fault, ThreadID _tid, unsigned _vpe)
+                             Fault fault, ThreadID _tid, DynInstPtr inst)
     : Event(CPU_Tick_Pri), cpu(_cpu)
 {
-    setEvent(e_type, fault, _tid, _vpe);
+    setEvent(e_type, fault, _tid, inst);
 }
 
 
@@ -317,7 +317,7 @@ InOrderCPU::InOrderCPU(Params *params)
     contextSwitch = false;
 
     // Define dummy instructions and resource requests to be used.
-    DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0);
+    dummyInst = new InOrderDynInst(this, NULL, 0, 0);
     dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0);
 
     // Reset CPU to reset state.
@@ -570,7 +570,7 @@ void
 InOrderCPU::trap(Fault fault, ThreadID tid, int delay)
 {
     //@ Squash Pipeline during TRAP
-    scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay);
+    scheduleCpuEvent(Trap, fault, tid, dummyInst, delay);
 }
 
 void
@@ -581,9 +581,10 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid)
 
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
-                           ThreadID tid, unsigned vpe, unsigned delay)
+                             ThreadID tid, DynInstPtr inst, 
+                             unsigned delay)
 {
-    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe);
+    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst);
 
     if (delay >= 0) {
         DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n",
@@ -597,7 +598,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
     // Broadcast event to the Resource Pool
     DynInstPtr dummy_inst =
         new InOrderDynInst(this, NULL, getNextEventNum(), tid);
-    resPool->scheduleEvent(c_event, dummy_inst, 0, 0, tid);
+    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
 }
 
 inline bool
@@ -699,7 +700,7 @@ InOrderCPU::enableVirtProcElement(unsigned vpe)
             "Enabling of concurrent virtual processor execution",
             vpe);
 
-    scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, vpe);
+    scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, dummyInst);
 }
 
 void
@@ -725,7 +726,7 @@ InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe)
             "Disabling of concurrent virtual processor execution",
             vpe);
 
-    scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, vpe);
+    scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, dummyInst);
 }
 
 void
@@ -759,7 +760,7 @@ InOrderCPU::enableMultiThreading(unsigned vpe)
     DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on "
             "virtual processor %i", vpe);
 
-    scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, vpe);
+    scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, dummyInst);
 }
 
 void
@@ -786,7 +787,7 @@ InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe)
    DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on "
             "virtual processor %i", tid, vpe);
 
-    scheduleCpuEvent(DisableThreads, NoFault, tid, vpe);
+    scheduleCpuEvent(DisableThreads, NoFault, tid, dummyInst);
 }
 
 void
@@ -850,7 +851,7 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
-    scheduleCpuEvent(ActivateThread, NoFault, tid, 0/*vpe*/, delay);
+    scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst, delay);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -863,7 +864,7 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 void
 InOrderCPU::suspendContext(ThreadID tid, int delay)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, 0/*vpe*/, delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay);
     //_status = Idle;
 }
 
@@ -877,7 +878,7 @@ InOrderCPU::suspendThread(ThreadID tid)
 void
 InOrderCPU::deallocateContext(ThreadID tid, int delay)
 {
-    scheduleCpuEvent(DeallocateThread, NoFault, tid, 0/*vpe*/, delay);
+    scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay);
 }
 
 void
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 804054f8c..4c7b2710d 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -199,22 +199,24 @@ class InOrderCPU : public BaseCPU
       public:
         CPUEventType cpuEventType;
         ThreadID tid;
-        unsigned vpe;
+        DynInstPtr inst;
         Fault fault;
-
+        unsigned vpe;
+        
       public:
         /** Constructs a CPU event. */
         CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault,
-                 ThreadID _tid, unsigned _vpe);
+                 ThreadID _tid, DynInstPtr inst);
 
         /** Set Type of Event To Be Scheduled */
         void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid,
-                      unsigned _vpe)
+                      DynInstPtr _inst)
         {
             fault = _fault;
             cpuEventType = e_type;
             tid = _tid;
-            vpe = _vpe;
+            inst = _inst;
+            vpe = 0;            
         }
 
         /** Processes a resource event. */
@@ -232,7 +234,7 @@ class InOrderCPU : public BaseCPU
 
     /** Schedule a CPU Event */
     void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid,
-                          unsigned vpe, unsigned delay = 0);
+                          DynInstPtr inst, unsigned delay = 0);
 
   public:
     /** Interface between the CPU and CPU resources. */
@@ -240,7 +242,7 @@ class InOrderCPU : public BaseCPU
 
     /** Instruction used to signify that there is no *real* instruction in 
         buffer slot */
-    DynInstPtr dummyBufferInst;
+    DynInstPtr dummyInst;
 
     /** Used by resources to signify a denied access to a resource. */
     ResourceRequest *dummyReq;
-- 
cgit v1.2.3


From eac5eac67ae8076e934d78063a24eeef08f25413 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:13 -0500
Subject: inorder: squash on memory stall add code to recognize memory stalls
 in resources and the pipeline as well as squash a thread if there is a stall
 and we are in the switch on cache miss model

---
 src/cpu/inorder/cpu.cc                  |  29 +++++
 src/cpu/inorder/cpu.hh                  |   8 +-
 src/cpu/inorder/first_stage.cc          |  44 ++++----
 src/cpu/inorder/first_stage.hh          |   2 +
 src/cpu/inorder/pipeline_stage.cc       |  35 ++++--
 src/cpu/inorder/pipeline_stage.hh       |   2 +-
 src/cpu/inorder/resource.cc             |   8 +-
 src/cpu/inorder/resource.hh             |  12 ++-
 src/cpu/inorder/resource_pool.cc        | 185 +++++++++++++++++++++-----------
 src/cpu/inorder/resource_pool.hh        |   8 +-
 src/cpu/inorder/resources/cache_unit.cc |  48 +++++++--
 src/cpu/inorder/resources/cache_unit.hh |   3 +
 12 files changed, 278 insertions(+), 106 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 69aea0c57..035aa0571 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -140,6 +140,10 @@ InOrderCPU::CPUEvent::process()
         cpu->disableThreads(tid, vpe);
         break;
 
+      case SquashFromMemStall:
+        cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid);
+        break;
+
       case Trap:
         cpu->trapCPU(fault, tid);
         break;
@@ -579,6 +583,31 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid)
     fault->invoke(tcBase(tid));
 }
 
+void 
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+{
+    scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
+}
+
+
+void
+InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid)
+{
+    DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n");
+        
+    // Squash all instructions in each stage including 
+    // instruction that caused the squash (seq_num - 1)
+    // NOTE: The stage bandwidth needs to be cleared so thats why
+    //       the stalling instruction is squashed as well. The stalled
+    //       instruction is previously placed in another intermediate buffer
+    //       while it's stall is being handled.
+    InstSeqNum squash_seq_num = seq_num - 1;
+    
+    for (int stNum=stage_num; stNum >= 0 ; stNum--) {
+        pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid);
+    }
+}
+
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
                              ThreadID tid, DynInstPtr inst, 
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 4c7b2710d..5d34de67a 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -183,7 +183,7 @@ class InOrderCPU : public BaseCPU
         EnableVPEs,
         Trap,
         InstGraduated,
-        SquashAll,
+        SquashFromMemStall,
         UpdatePCs,
         NumCPUEvents
     };
@@ -344,6 +344,12 @@ class InOrderCPU : public BaseCPU
     void trap(Fault fault, ThreadID tid, int delay = 0);
     void trapCPU(Fault fault, ThreadID tid);
 
+    /** squashFromMemStall() - sets up a squash event
+     *  squashDueToMemStall() - squashes pipeline
+     */
+    void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
+    void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
+
     /** Setup CPU to insert a thread's context */
     void insertThread(ThreadID tid);
 
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index 8bd703c56..1427ca46a 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
 
     // Clear the instruction list and skid buffer in case they have any
     // insts in them.
-    DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n");
+    DPRINTF(InOrderStage, "Removing instructions from stage instruction "
+            "list.\n");
     while (!insts[tid].empty()) {
         if (insts[tid].front()->seqNum <= squash_seq_num) {
-            DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= "
-                    "squashing seqNum %i.\n",
+            DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because "
+                    "it's <= squashing seqNum %i.\n",
                     tid,
                     insts[tid].front()->seqNum,
                     squash_seq_num);
@@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
                     insts[tid].size());
             break;
         }
-        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
-                tid, insts[tid].front()->seqNum, insts[tid].front()->PC);
+        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
+                "PC %08p.\n", tid, insts[tid].front()->seqNum, 
+                insts[tid].front()->PC);
         insts[tid].pop();
     }
 
@@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
     cpu->removeInstsUntil(squash_seq_num, tid);
 }
 
+void 
+FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
+{
+    // Need to preserve the stalling instruction in first-stage
+    // since the squash() from first stage also removes
+    // the instruction from the CPU (removeInstsUntil). If that
+    // functionality gets changed then you can move this offset.
+    // (stalling instruction = seq_num + 1)
+    squash(seq_num+1, tid);
+}
+
+
 void
 FirstStage::processStage(bool &status_change)
 {
@@ -106,6 +120,7 @@ FirstStage::processStage(bool &status_change)
 
     for (int threadFetched = 0; threadFetched < numFetchingThreads;
          threadFetched++) {
+
         ThreadID tid = getFetchingThread(fetchPolicy);
 
         if (tid >= 0) {
@@ -117,14 +132,17 @@ FirstStage::processStage(bool &status_change)
     }
 }
 
-//@TODO: Note in documentation, that when you make a pipeline stage change, then
-//make sure you change the first stage too
+//@TODO: Note in documentation, that when you make a pipeline stage change, 
+//then make sure you change the first stage too
 void
 FirstStage::processInsts(ThreadID tid)
 {
     bool all_reqs_completed = true;
 
-    for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) {
+    for (int insts_fetched = 0; 
+         insts_fetched < stageWidth && canSendInstToStage(1); 
+         insts_fetched++) {
+
         DynInstPtr inst;
         bool new_inst = false;
 
@@ -150,19 +168,9 @@ FirstStage::processInsts(ThreadID tid)
             inst->traceData = NULL;
 #endif      // TRACING_ON
 
-            DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n",
-                    inst->readTid(),
-                    inst->seqNum,
-                    0/*inst->curCount()*/);
-
             // Add instruction to the CPU's list of instructions.
             inst->setInstListIt(cpu->addInst(inst));
 
-            DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n",
-                    inst->readTid(),
-                    inst->seqNum,
-                    0/*inst->curCount()*/);
-
             // Create Front-End Resource Schedule For Instruction
             ThePipeline::createFrontEndSchedule(inst);
         }
diff --git a/src/cpu/inorder/first_stage.hh b/src/cpu/inorder/first_stage.hh
index 2a69678e4..383b799f3 100644
--- a/src/cpu/inorder/first_stage.hh
+++ b/src/cpu/inorder/first_stage.hh
@@ -61,6 +61,8 @@ class FirstStage : public PipelineStage {
     /** Squash Instructions Above a Seq. Num */
     void squash(InstSeqNum squash_seq_num, ThreadID tid);
 
+    void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
+
     /** There are no insts. coming from previous stages, so there is
      *	no need to sort insts here
      */
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 8d14aae27..1fd7150da 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -339,9 +339,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
 {
     if (cpu->squashSeqNum[tid] < inst->seqNum &&
         cpu->lastSquashCycle[tid] == curTick){
-        DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another "
-                "stage's squash signal for after [sn:%i].\n", inst->seqNum, 
-                cpu->squashSeqNum[tid]);
+        DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to "
+                "another stage's squash signal for after [sn:%i].\n", 
+                inst->seqNum, cpu->squashSeqNum[tid]);
     } else {
         // Send back mispredict information.
         toPrevStages->stageInfo[stageNum][tid].branchMispredict = true;
@@ -381,6 +381,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
     }
 }
 
+void
+PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
+{
+    squash(seq_num, tid);    
+}
+
 void
 PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
 {
@@ -413,8 +419,9 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
     while (!skidBuffer[tid].empty()) {
         if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
             DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer "
-                    "instructions before delay slot [sn:%i]. %i insts"
-                    "left.\n", tid, squash_seq_num,
+                    "instructions (starting w/[sn:%i]) before delay slot "
+                    "[sn:%i]. %i insts left.\n", tid, 
+                    skidBuffer[tid].front()->seqNum, squash_seq_num,
                     skidBuffer[tid].size());
             break;
         }
@@ -775,7 +782,7 @@ void
 PipelineStage::processThread(bool &status_change, ThreadID tid)
 {
     // If status is Running or idle,
-    //     call stageInsts()
+    //     call processInsts()
     // If status is Unblocking,
     //     buffer any instructions coming from fetch
     //     continue trying to empty skid buffer
@@ -787,7 +794,7 @@ PipelineStage::processThread(bool &status_change, ThreadID tid)
         ;//++stageSquashCycles;
     }
 
-    // Stage should try to stage as many instructions as its bandwidth
+    // Stage should try to process as many instructions as its bandwidth
     // will allow, as long as it is not currently blocked.
     if (stageStatus[tid] == Running ||
         stageStatus[tid] == Idle) {
@@ -904,9 +911,7 @@ bool
 PipelineStage::processInstSchedule(DynInstPtr inst)
 {
     bool last_req_completed = true;
-#if TRACING_ON
     ThreadID tid = inst->readTid();
-#endif
 
     if (inst->nextResStage() == stageNum) {
         int res_stage_num = inst->nextResStage();
@@ -937,6 +942,18 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
 
                 last_req_completed = false;
 
+                if (req->isMemStall() && 
+                    cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+                    // Save Stalling Instruction
+                    switchedOutBuffer[tid] = inst;
+                    switchedOutValid[tid] = true;
+                    
+                    // Remove Thread From Pipeline & Resource Pool
+                    inst->squashingStage = stageNum;         
+                    inst->bdelaySeqNum = inst->seqNum;                               
+                    cpu->squashFromMemStall(inst, tid);                    
+                }
+                
                 break;
             }
 
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index 42a632560..f10906e4c 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -240,7 +240,7 @@ class PipelineStage
      */
     virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
 
-    virtual void squashDueToMemStall(DynInstPtr &inst, ThreadID tid);
+    virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
 
     /** Squash instructions from stage buffer  */
     virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid);
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index 286332e08..47a9a4b9a 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -340,6 +340,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
     }
 }
 
+void
+Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
+                              ThreadID tid)
+{
+    squash(inst, stage_num, squash_seq_num, tid);    
+}
 
 Tick
 Resource::ticks(int num_cycles)
@@ -407,7 +413,7 @@ ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst,
                                  unsigned _cmd)
     : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
       resIdx(res_idx), slotNum(slot_num), completed(false),
-      squashed(false), processing(false), waiting(false)
+      squashed(false), processing(false), memStall(false)
 {
 #ifdef DEBUG
         reqID = resReqID++;
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index 2cf8e61eb..f7c4b8fcd 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -156,6 +156,9 @@ class Resource {
     virtual void squash(DynInstPtr inst, int stage_num,
                         InstSeqNum squash_seq_num, ThreadID tid);
 
+    virtual void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                                     InstSeqNum squash_seq_num, ThreadID tid);
+
     /** The number of instructions available that this resource can
      *  can still process
      */
@@ -376,8 +379,8 @@ class ResourceRequest
     void setProcessing() { processing = true; }
 
     /** Get/Set IsWaiting variables */
-    bool isWaiting() { return waiting; }
-    void setWaiting() { waiting = true; }
+    bool isMemStall() { return memStall; }
+    void setMemStall(bool stall = true) { memStall = stall; }
 
   protected:
     /** Resource Identification */
@@ -386,11 +389,12 @@ class ResourceRequest
     int resIdx;
     int slotNum;
 
-    /** Resource Status */
+    /** Resource Request Status */
     bool completed;
     bool squashed;
     bool processing;
-    bool waiting;
+
+    bool memStall;
 };
 
 #endif //__CPU_INORDER_RESOURCE_HH__
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 0d78c232b..8822715c7 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -41,45 +41,62 @@ using namespace ThePipeline;
 ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
     : cpu(_cpu)
 {
-    //@todo: use this function to instantiate the resources in resource pool. This will help in the
-    //auto-generation of this pipeline model.
+    //@todo: use this function to instantiate the resources in resource pool. 
+    //This will help in the auto-generation of this pipeline model.
     //ThePipeline::addResources(resources, memObjects);
 
     // Declare Resource Objects
     // name - id - bandwidth - latency - CPU - Parameters
     // --------------------------------------------------
-    resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params));
+    resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, 
+                                         StageWidth * 2, 0, _cpu, params));
 
     memObjects.push_back(ICache);
-    resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new CacheUnit("icache_port", ICache, 
+                                      StageWidth * MaxThreads, 0, _cpu, 
+                                      params));
 
-    resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params));
+    resources.push_back(new DecodeUnit("Decode-Unit", Decode, 
+                                       StageWidth, 0, _cpu, params));
 
-    resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params));
+    resources.push_back(new BranchPredictor("Branch-Predictor", BPred, 
+                                            StageWidth, 0, _cpu, params));
 
-    resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params));
+    resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 
+                                       0, _cpu, params));
 
-    resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, 
+                                       StageWidth * MaxThreads, 0, _cpu, 
+                                       params));
 
-    resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params));
+    resources.push_back(new AGENUnit("AGEN-Unit", AGEN, 
+                                     StageWidth, 0, _cpu, params));
 
-    resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params));
+    resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, 
+                                          StageWidth, 0, _cpu, params));
 
-    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params));
+    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, 
+                                        params));
 
     memObjects.push_back(DCache);
-    resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new CacheUnit("dcache_port", DCache, 
+                                      StageWidth * MaxThreads, 0, _cpu, 
+                                      params));
 
-    resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new GraduationUnit("Graduation-Unit", Grad, 
+                                           StageWidth * MaxThreads, 0, _cpu, 
+                                           params));
 
-    resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params));
+    resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 
+                                       0, _cpu, params));
 }
 
 void
 ResourcePool::init()
 {
     for (int i=0; i < resources.size(); i++) {
-        DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name());
+        DPRINTF(Resource, "Initializing resource: %s.\n", 
+                resources[i]->name());
         
         resources[i]->init();
     }
@@ -113,8 +130,8 @@ ResourcePool::getPort(const std::string &if_name, int idx)
         int obj_idx = memObjects[i];
         Port *port = resources[obj_idx]->getPort(if_name, idx);
         if (port != NULL) {
-            DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name,
-                    resources[obj_idx]->name(), obj_idx);
+            DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", 
+                    if_name, resources[obj_idx]->name(), obj_idx);
             return port;
         }
     }
@@ -131,7 +148,8 @@ ResourcePool::getPortIdx(const std::string &port_name)
         unsigned obj_idx = memObjects[i];
         Port *port = resources[obj_idx]->getPort(port_name, obj_idx);
         if (port != NULL) {
-            DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name);
+            DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, 
+                    port_name);
             return obj_idx;
         }
     }
@@ -167,7 +185,8 @@ void
 ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num,
                      ThreadID tid)
 {
-    resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid);
+    resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, 
+                               tid);
 }
 
 int
@@ -192,15 +211,17 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
     {
       case InOrderCPU::ActivateThread:
         {
-            DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n",
-                    curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,
-                                                            e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->bdelaySeqNum,
-                                                            inst->readTid());
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event "
+                    "for tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,
+                                 e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
         }
         break;
 
@@ -208,49 +229,72 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
       case InOrderCPU::DeallocateThread:
         {
 
-            DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n",
-                    curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,
-                                                            e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->bdelaySeqNum,
-                                                            tid);
+            DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool "
+                    "Event for tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,
+                                 e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 tid);
 
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
 
         }
         break;
 
       case ResourcePool::InstGraduated:
         {
-            DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n",
-                    curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->seqNum,
-                                                            inst->readTid());
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool "
+                    "Event for tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->seqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
 
         }
         break;
 
       case ResourcePool::SquashAll:
         {
-            DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n",
+            DPRINTF(Resource, "Scheduling Squash Resource Pool Event for "
+                    "tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
+        }
+        break;
+
+      case InOrderCPU::SquashFromMemStall:
+        {
+            DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource "
+                    "Pool Event for tick %i.\n",
                     curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->bdelaySeqNum,
-                                                            inst->readTid());
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->seqNum - 1,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
         }
         break;
 
       default:
-        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]);
+        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", 
+                InOrderCPU::eventNames[e_type]);
         ; // If Resource Pool doesnt recognize event, we ignore it.
     }
 }
@@ -265,8 +309,8 @@ void
 ResourcePool::squashAll(DynInstPtr inst, int stage_num,
                         InstSeqNum done_seq_num, ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n",
-            stage_num, tid, done_seq_num);
+    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above "
+            "[sn:%i].\n", stage_num, tid, done_seq_num);
 
     int num_resources = resources.size();
 
@@ -275,11 +319,26 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num,
     }
 }
 
+void
+ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum done_seq_num, ThreadID tid)
+{
+    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above "
+            "[sn:%i].\n", stage_num, tid, done_seq_num);
+
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num, 
+                                            tid);
+    }
+}
+
 void
 ResourcePool::activateAll(ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n",
-            tid);
+    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all "
+            "resources.\n", tid);
 
     int num_resources = resources.size();
 
@@ -291,8 +350,8 @@ ResourcePool::activateAll(ThreadID tid)
 void
 ResourcePool::deactivateAll(ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n",
-            tid);
+    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all "
+            "resources.\n", tid);
 
     int num_resources = resources.size();
 
@@ -304,8 +363,8 @@ ResourcePool::deactivateAll(ThreadID tid)
 void
 ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n",
-            tid, seq_num);
+    DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all "
+            "resources.\n", tid, seq_num);
 
     int num_resources = resources.size();
 
@@ -353,6 +412,10 @@ ResourcePool::ResPoolEvent::process()
         resPool->squashAll(inst, stageNum, seqNum, tid);
         break;
 
+      case InOrderCPU::SquashFromMemStall:
+        resPool->squashDueToMemStall(inst, stageNum, seqNum, tid);
+        break;
+
       default:
         fatal("Unrecognized Event Type");
     }
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index 016fae2bf..61e691f35 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -123,7 +123,7 @@ class ResourcePool {
     };
 
   public:
-  ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
+    ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
     virtual ~ResourcePool() {}
 
     std::string name();
@@ -160,6 +160,12 @@ class ResourcePool {
     void squashAll(DynInstPtr inst, int stage_num,
                    InstSeqNum done_seq_num, ThreadID tid);
 
+    /** Squash Resources in Pool after a memory stall 
+     *  NOTE: Only use during Switch-On-Miss Thread model
+     */    
+    void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum done_seq_num, ThreadID tid);
+
     /** Activate Thread in all resources */
     void activateAll(ThreadID tid);
 
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index eb66e10f8..570d27fbe 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -241,8 +241,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
         // If different, then update command in the request
         cache_req->cmd = inst->resSched.top()->cmd;
         DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: Updating the command for this instruction\n",
-                inst->readTid(), inst->seqNum);
+                "[tid:%i]: [sn:%i]: Updating the command for this "
+                "instruction\n ", inst->readTid(), inst->seqNum);
 
         service_request = true;
     } else {
@@ -416,6 +416,7 @@ CacheUnit::execute(int slot_num)
                     tid, seq_num, inst->staticInst->disassemble(inst->PC));
 
             delete cache_req->dataPkt;
+            //cache_req->setMemStall(false);            
             cache_req->done();
         } else {
             DPRINTF(InOrderCachePort,
@@ -425,6 +426,7 @@ CacheUnit::execute(int slot_num)
                     "STALL: [tid:%i]: Fetch miss from %08p\n",
                     tid, cache_req->inst->readPC());
             cache_req->setCompleted(false);
+            //cache_req->setMemStall(true);            
         }
         break;
 
@@ -437,11 +439,13 @@ CacheUnit::execute(int slot_num)
         if (cache_req->isMemAccComplete() ||
             inst->isDataPrefetch() ||
             inst->isInstPrefetch()) {
+            cache_req->setMemStall(false);            
             cache_req->done();
         } else {
             DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
                     tid, cache_req->inst->getMemAddr());
             cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
         }
         break;
 
@@ -510,7 +514,8 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
     if (cache_req->pktCmd == MemCmd::WriteReq) {
         cache_req->pktCmd =
             cache_req->memReq->isSwap() ? MemCmd::SwapReq :
-            (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
+            (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq 
+             : MemCmd::WriteReq);
     }
 
     cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd,
@@ -641,8 +646,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
             ExtMachInst ext_inst;
             StaticInstPtr staticInst = NULL;
             Addr inst_pc = inst->readPC();
-            MachInst mach_inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
-                                (cache_pkt->getPtr<uint8_t>()));
+            MachInst mach_inst = 
+                TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
+                             (cache_pkt->getPtr<uint8_t>()));
 
             predecoder.setTC(cpu->thread[tid]->getTC());
             predecoder.moreBytes(inst_pc, inst_pc, mach_inst);
@@ -755,7 +761,8 @@ CacheUnitEvent::process()
 
     tlb_res->tlbBlocked[tid] = false;
 
-    tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
+    tlb_res->cpu->pipelineStage[stage_num]->
+        unsetResStall(tlb_res->reqMap[slotIdx], tid);
 
     req_ptr->tlbStall = false;
 
@@ -764,6 +771,23 @@ CacheUnitEvent::process()
     }
 }
 
+void
+CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
+                               InstSeqNum squash_seq_num, ThreadID tid)
+{
+    // If squashing due to memory stall, then we do NOT want to 
+    // squash the instruction that caused the stall so we
+    // increment the sequence number here to prevent that.
+    //
+    // NOTE: This is only for the SwitchOnCacheMiss Model
+    // NOTE: If you have multiple outstanding misses from the same
+    //       thread then you need to reevaluate this code
+    // NOTE: squash should originate from 
+    //       pipeline_stage.cc:processInstSchedule
+    squash(inst, stage_num, squash_seq_num + 1, tid);    
+}
+
+
 void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
                   InstSeqNum squash_seq_num, ThreadID tid)
@@ -798,7 +822,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
 
                 int stall_stage = reqMap[req_slot_num]->getStageNum();
 
-                cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
+                cpu->pipelineStage[stall_stage]->
+                    unsetResStall(reqMap[req_slot_num], tid);
             }
 
             if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
@@ -927,14 +952,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
 
 template<>
 Fault
-CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
+                 uint64_t *res)
 {
     return write(inst, *(uint64_t*)&data, addr, flags, res);
 }
 
 template<>
 Fault
-CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
+                 uint64_t *res)
 {
     return write(inst, *(uint32_t*)&data, addr, flags, res);
 }
@@ -942,7 +969,8 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_
 
 template<>
 Fault
-CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
+                 uint64_t *res)
 {
     return write(inst, (uint32_t)data, addr, flags, res);
 }
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index c467e9771..a6b07ebd9 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -146,6 +146,9 @@ class CacheUnit : public Resource
     void squash(DynInstPtr inst, int stage_num,
                 InstSeqNum squash_seq_num, ThreadID tid);
 
+    void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum squash_seq_num, ThreadID tid);
+
     /** Processes cache completion event. */
     void processCacheCompletion(PacketPtr pkt);
 
-- 
cgit v1.2.3


From 4a945aab1958d39fcfea4608715e77d5112809cf Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:26 -0500
Subject: inorder: add event priority offset allow for events to schedule
 themselves later if desired. this is important because of cases like where
 you need to activate a thread only after the previous thread has been
 deactivated. The ordering there has to be enforced

---
 src/cpu/inorder/cpu.cc | 15 +++++++++------
 src/cpu/inorder/cpu.hh |  5 +++--
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 035aa0571..c0daad207 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -84,8 +84,10 @@ InOrderCPU::TickEvent::description()
 }
 
 InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
-                             Fault fault, ThreadID _tid, DynInstPtr inst)
-    : Event(CPU_Tick_Pri), cpu(_cpu)
+                               Fault fault, ThreadID _tid, DynInstPtr inst,
+                               unsigned event_pri_offset)
+    : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)),
+      cpu(_cpu)
 {
     setEvent(e_type, fault, _tid, inst);
 }
@@ -611,13 +613,14 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid)
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
                              ThreadID tid, DynInstPtr inst, 
-                             unsigned delay)
+                             unsigned delay, unsigned event_pri_offset)
 {
-    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst);
+    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
+                                       event_pri_offset);
 
     if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n",
-                eventNames[c_event], curTick + delay);
+        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+                eventNames[c_event], curTick + delay, tid);
         mainEventQueue.schedule(cpu_event,curTick + delay);
     } else {
         cpu_event->process();
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 5d34de67a..1c819638d 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -206,7 +206,7 @@ class InOrderCPU : public BaseCPU
       public:
         /** Constructs a CPU event. */
         CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault,
-                 ThreadID _tid, DynInstPtr inst);
+                 ThreadID _tid, DynInstPtr inst, unsigned event_pri_offset);
 
         /** Set Type of Event To Be Scheduled */
         void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid,
@@ -234,7 +234,8 @@ class InOrderCPU : public BaseCPU
 
     /** Schedule a CPU Event */
     void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid,
-                          DynInstPtr inst, unsigned delay = 0);
+                          DynInstPtr inst, unsigned delay = 0,
+                          unsigned event_pri_offset = 0);
 
   public:
     /** Interface between the CPU and CPU resources. */
-- 
cgit v1.2.3


From e1fcc6498017574735362636791f9ad73fb39b04 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:32 -0500
Subject: inorder: activate thread on cache miss -Support ability to activate
 next ready thread after a cache miss through the
 activateNextReadyContext/Thread() functions -To support this a "readyList" of
 thread ids is added -After a cache miss, thread will suspend and then call
 activitynextreadythread

---
 src/cpu/inorder/cpu.cc            | 78 ++++++++++++++++++++++++++++++++++++---
 src/cpu/inorder/cpu.hh            | 23 +++++++++++-
 src/cpu/inorder/pipeline_stage.cc | 10 ++++-
 src/cpu/inorder/resource_pool.cc  | 34 +++++++++++------
 src/cpu/inorder/thread_context.cc |  6 +--
 5 files changed, 128 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index c0daad207..e52e5935a 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -96,6 +96,8 @@ InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
 std::string InOrderCPU::eventNames[NumCPUEvents] =
 {
     "ActivateThread",
+    "ActivateNextReadyThread",
+    "DeactivateThread",
     "DeallocateThread",
     "SuspendThread",
     "DisableThreads",
@@ -119,9 +121,18 @@ InOrderCPU::CPUEvent::process()
 
       //@TODO: Consider Implementing "Suspend Thread" as Separate from 
       //Deallocate
+      case ActivateNextReadyThread:
+        cpu->activateNextReadyThread();
+        break;
+
+      case DeactivateThread:
+        cpu->deactivateThread(tid);
+        break;
+
       case SuspendThread: // Suspend & Deallocate are same for now.
-        //cpu->suspendThread(tid);
-        //break;
+        cpu->suspendThread(tid);
+        break;
+
       case DeallocateThread:
         cpu->deallocateThread(tid);
         break;
@@ -225,6 +236,14 @@ InOrderCPU::InOrderCPU(Params *params)
 
     if (active_threads > 1) {
         threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+        if (threadModel == SMT) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
+        } else if (threadModel == SwitchOnCacheMiss) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to "
+                    "Switch On Cache Miss\n");
+        }
+        
     } else {
         threadModel = Single;
     }
@@ -628,8 +647,8 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
     }
 
     // Broadcast event to the Resource Pool
-    DynInstPtr dummy_inst =
-        new InOrderDynInst(this, NULL, getNextEventNum(), tid);
+    // Need to reset tid just in case this is a dummy instruction
+    inst->setTid(tid);        
     resPool->scheduleEvent(c_event, inst, 0, 0, tid);
 }
 
@@ -643,10 +662,39 @@ InOrderCPU::isThreadActive(ThreadID tid)
 }
 
 
+void
+InOrderCPU::activateNextReadyThread()
+{
+    if (readyThreads.size() >= 1) {          
+        ThreadID ready_tid = readyThreads.front();
+        
+        // Activate in Pipeline
+        activateThread(ready_tid);                        
+        
+        // Activate in Resource Pool
+        resPool->activateAll(ready_tid);
+        
+        list<ThreadID>::iterator ready_it =
+            std::find(readyThreads.begin(), readyThreads.end(), ready_tid);
+        readyThreads.erase(ready_it);                        
+    } else {
+        DPRINTF(InOrderCPU,
+                "No Ready Threads to Activate.\n");
+    }        
+}
+
 void
 InOrderCPU::activateThread(ThreadID tid)
 {
-    if (!isThreadActive(tid)) {
+    if (threadModel == SwitchOnCacheMiss &&
+        numActiveThreads() == 1) {
+        DPRINTF(InOrderCPU,
+                "Ignoring Activation of [tid:%i]. Placing on "
+                "ready list\n", tid);        
+
+        readyThreads.push_back(tid);
+        
+    } else if (!isThreadActive(tid)) {
         DPRINTF(InOrderCPU,
                 "Adding Thread %i to active threads list in CPU.\n", tid);
         activeThreads.push_back(tid);
@@ -892,6 +940,23 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
     _status = Running;
 }
 
+void
+InOrderCPU::activateNextReadyContext(int delay)
+{
+    DPRINTF(InOrderCPU,"Activating next ready thread\n");
+
+    // NOTE: Add 5 to the event priority so that we always activate
+    // threads after we've finished deactivating, squashing,etc.
+    // other threads
+    scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst, 
+                     delay, 5);
+
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
+
+    _status = Running;
+}
 
 void
 InOrderCPU::suspendContext(ThreadID tid, int delay)
@@ -903,8 +968,9 @@ InOrderCPU::suspendContext(ThreadID tid, int delay)
 void
 InOrderCPU::suspendThread(ThreadID tid)
 {
-    DPRINTF(InOrderCPU,"[tid: %i]: Suspended ...\n", tid);
+    DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid);
     deactivateThread(tid);
+    suspendedThreads.push_back(tid);    
 }
 
 void
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 1c819638d..854f5167c 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -89,7 +89,7 @@ class InOrderCPU : public BaseCPU
     typedef TimeBuffer<InterStageStruct> StageQueue;
 
     friend class Resource;
-
+    
   public:
     /** Constructs a CPU with the given parameters. */
     InOrderCPU(Params *params);
@@ -175,6 +175,8 @@ class InOrderCPU : public BaseCPU
     // pool event.
     enum CPUEventType {
         ActivateThread,
+        ActivateNextReadyThread,
+        DeactivateThread,
         DeallocateThread,
         SuspendThread,
         DisableThreads,
@@ -361,6 +363,10 @@ class InOrderCPU : public BaseCPU
     void activateContext(ThreadID tid, int delay = 0);
     void activateThread(ThreadID tid);
 
+    /** Add Thread to Active Threads List. */
+    void activateNextReadyContext(int delay = 0);
+    void activateNextReadyThread();
+
     /** Remove Thread from Active Threads List */
     void suspendContext(ThreadID tid, int delay = 0);
     void suspendThread(ThreadID tid);
@@ -612,6 +618,9 @@ class InOrderCPU : public BaseCPU
     /** Current Threads List */
     std::list<ThreadID> currentThreads;
 
+    /** Ready Threads List */
+    std::list<ThreadID> readyThreads;
+
     /** Suspended Threads List */
     std::list<ThreadID> suspendedThreads;
 
@@ -633,6 +642,18 @@ class InOrderCPU : public BaseCPU
     /** Number of Active Threads in the CPU */
     ThreadID numActiveThreads() { return activeThreads.size(); }
 
+    /** Thread id of active thread
+     *  Only used for SwitchOnCacheMiss model. Assumes only 1 thread active
+     */
+    ThreadID activeThreadId() 
+    { 
+        if (numActiveThreads() > 0)
+            return activeThreads.front();
+        else
+            return -1;
+    }
+    
+     
     /** Records that there was time buffer activity this cycle. */
     void activityThisCycle() { activityRec.activity(); }
 
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 1fd7150da..30a3733b0 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -951,7 +951,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                     // Remove Thread From Pipeline & Resource Pool
                     inst->squashingStage = stageNum;         
                     inst->bdelaySeqNum = inst->seqNum;                               
-                    cpu->squashFromMemStall(inst, tid);                    
+                    cpu->squashFromMemStall(inst, tid);  
+
+                    // Switch On Cache Miss
+                    //=====================
+                    // Suspend Thread at end of cycle
+                    cpu->suspendContext(tid);                    
+
+                    // Activate Next Ready Thread at end of cycle
+                    cpu->activateNextReadyContext();                                                                                               
                 }
                 
                 break;
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 8822715c7..97ba4d087 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -212,7 +212,8 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
       case InOrderCPU::ActivateThread:
         {
             DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event "
-                    "for tick %i.\n", curTick + delay);
+                    "for tick %i, [tid:%i].\n", curTick + delay, 
+                    inst->readTid());
             ResPoolEvent *res_pool_event = 
                 new ResPoolEvent(this,
                                  e_type,
@@ -295,7 +296,6 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
       default:
         DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", 
                 InOrderCPU::eventNames[e_type]);
-        ; // If Resource Pool doesnt recognize event, we ignore it.
     }
 }
 
@@ -310,7 +310,7 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num,
                         InstSeqNum done_seq_num, ThreadID tid)
 {
     DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above "
-            "[sn:%i].\n", stage_num, tid, done_seq_num);
+            "[sn:%i].\n", tid, stage_num, done_seq_num);
 
     int num_resources = resources.size();
 
@@ -337,14 +337,24 @@ ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num,
 void
 ResourcePool::activateAll(ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all "
-            "resources.\n", tid);
-
-    int num_resources = resources.size();
-
-    for (int idx = 0; idx < num_resources; idx++) {
-        resources[idx]->activateThread(tid);
-    }
+    bool do_activate = cpu->threadModel != InOrderCPU::SwitchOnCacheMiss ||
+        cpu->numActiveThreads() < 1 ||
+        cpu->activeThreadId() == tid;
+    
+        
+    if (do_activate) {
+        DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all "
+                    "resources.\n", tid);
+ 
+        int num_resources = resources.size();
+ 
+        for (int idx = 0; idx < num_resources; idx++) {
+            resources[idx]->activateThread(tid);
+        }
+    } else {
+        DPRINTF(Resource, "[tid:%i] Ignoring Thread Activation to all "
+                    "resources.\n", tid);
+     }
 }
 
 void
@@ -374,7 +384,7 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
 }
 
 ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool)
-    : Event(CPU_Tick_Pri), resPool(_resPool),
+    : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool),
       eventType((InOrderCPU::CPUEventType) Default)
 { }
 
diff --git a/src/cpu/inorder/thread_context.cc b/src/cpu/inorder/thread_context.cc
index 41d16b633..d2f511b9d 100644
--- a/src/cpu/inorder/thread_context.cc
+++ b/src/cpu/inorder/thread_context.cc
@@ -242,21 +242,21 @@ InOrderThreadContext::setRegOtherThread(int misc_reg, const MiscReg &val,
 void
 InOrderThreadContext::setPC(uint64_t val)
 {
-    DPRINTF(InOrderCPU, "Setting PC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting PC to %08p\n", thread->readTid(), val);
     cpu->setPC(val, thread->readTid());
 }
 
 void
 InOrderThreadContext::setNextPC(uint64_t val)
 {
-    DPRINTF(InOrderCPU, "Setting NPC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting NPC to %08p\n", thread->readTid(), val);
     cpu->setNextPC(val, thread->readTid());
 }
 
 void
 InOrderThreadContext::setNextNPC(uint64_t val)
 {
-    DPRINTF(InOrderCPU, "Setting NNPC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting NNPC to %08p\n", thread->readTid(), val);
     cpu->setNextNPC(val, thread->readTid());
 }
 
-- 
cgit v1.2.3


From d9eaa2fe2149528e109b8b32a00dd4fa72d8ec4f Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:40 -0500
Subject: inorder-cleanup: remove unused thread functions

---
 src/cpu/inorder/cpu.cc              | 278 +++++++-----------------------------
 src/cpu/inorder/cpu.hh              |  92 +++---------
 src/cpu/inorder/inorder_dyn_inst.cc |  24 ----
 src/cpu/inorder/inorder_dyn_inst.hh |   6 -
 4 files changed, 74 insertions(+), 326 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index e52e5935a..954309a74 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -100,13 +100,9 @@ std::string InOrderCPU::eventNames[NumCPUEvents] =
     "DeactivateThread",
     "DeallocateThread",
     "SuspendThread",
-    "DisableThreads",
-    "EnableThreads",
-    "DisableVPEs",
-    "EnableVPEs",
     "Trap",
     "InstGraduated",
-    "SquashAll",
+    "SquashFromMemStall",
     "UpdatePCs"
 };
 
@@ -119,8 +115,6 @@ InOrderCPU::CPUEvent::process()
         cpu->activateThread(tid);
         break;
 
-      //@TODO: Consider Implementing "Suspend Thread" as Separate from 
-      //Deallocate
       case ActivateNextReadyThread:
         cpu->activateNextReadyThread();
         break;
@@ -129,28 +123,12 @@ InOrderCPU::CPUEvent::process()
         cpu->deactivateThread(tid);
         break;
 
-      case SuspendThread: // Suspend & Deallocate are same for now.
-        cpu->suspendThread(tid);
-        break;
-
       case DeallocateThread:
         cpu->deallocateThread(tid);
         break;
 
-      case EnableVPEs:
-        cpu->enableVPEs(vpe);
-        break;
-
-      case DisableVPEs:
-        cpu->disableVPEs(tid, vpe);
-        break;
-
-      case EnableThreads:
-        cpu->enableThreads(vpe);
-        break;
-
-      case DisableThreads:
-        cpu->disableThreads(tid, vpe);
+      case SuspendThread: 
+        cpu->suspendThread(tid);
         break;
 
       case SquashFromMemStall:
@@ -212,8 +190,7 @@ InOrderCPU::InOrderCPU(Params *params)
 #endif // DEBUG
       switchCount(0),
       deferRegistration(false/*params->deferRegistration*/),
-      stageTracing(params->stageTracing),
-      numVirtProcs(1)
+      stageTracing(params->stageTracing)
 {    
     ThreadID active_threads;
     cpu_params = params;
@@ -335,11 +312,10 @@ InOrderCPU::InOrderCPU(Params *params)
         memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
         isa[tid].clear();
 
-        isa[tid].expandForMultithreading(numThreads, numVirtProcs);
+        isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/);
     }
 
     lastRunningCycle = curTick;
-    contextSwitch = false;
 
     // Define dummy instructions and resource requests to be used.
     dummyInst = new InOrderDynInst(this, NULL, 0, 0);
@@ -526,7 +502,7 @@ InOrderCPU::reset()
 {
     for (int i = 0; i < numThreads; i++) {
         isa[i].reset(coreType, numThreads,
-                numVirtProcs, dynamic_cast<BaseCPU*>(this));
+                     1/*numVirtProcs*/, dynamic_cast<BaseCPU*>(this));
     }
 }
 
@@ -703,6 +679,20 @@ InOrderCPU::activateThread(ThreadID tid)
     }
 }
 
+void
+InOrderCPU::deactivateContext(ThreadID tid, int delay)
+{
+    DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
+
+    scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst, delay);
+
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
+
+    _status = Running;
+}
+
 void
 InOrderCPU::deactivateThread(ThreadID tid)
 {
@@ -723,173 +713,58 @@ InOrderCPU::deactivateThread(ThreadID tid)
 }
 
 void
-InOrderCPU::removePipelineStalls(ThreadID tid)
-{
-    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
-            tid);
-
-    for (int stNum = 0; stNum < NumStages ; stNum++) {
-        pipelineStage[stNum]->removeStalls(tid);
-    }
-
-}
-bool
-InOrderCPU::isThreadInCPU(ThreadID tid)
-{
-  list<ThreadID>::iterator isCurrent =
-      std::find(currentThreads.begin(), currentThreads.end(), tid);
-
-    return (isCurrent != currentThreads.end());
-}
-
-void
-InOrderCPU::addToCurrentThreads(ThreadID tid)
-{
-    if (!isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU."
-                "\n", tid);
-        currentThreads.push_back(tid);
-    }
-}
-
-void
-InOrderCPU::removeFromCurrentThreads(ThreadID tid)
-{
-    if (isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU,
-                "Adding Thread %i to current threads list in CPU.\n", tid);
-        list<ThreadID>::iterator isCurrent =
-            std::find(currentThreads.begin(), currentThreads.end(), tid);
-        currentThreads.erase(isCurrent);
-    }
-}
-
-bool
-InOrderCPU::isThreadSuspended(ThreadID tid)
-{
-  list<ThreadID>::iterator isSuspended =
-      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
-
-    return (isSuspended!= suspendedThreads.end());
-}
-
-void
-InOrderCPU::enableVirtProcElement(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Enabling of concurrent virtual processor execution",
-            vpe);
-
-    scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, dummyInst);
-}
-
-void
-InOrderCPU::enableVPEs(unsigned vpe)
+InOrderCPU::deallocateContext(ThreadID tid, int delay)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Concurrent Execution "
-            "virtual processors %i", vpe);
-
-    list<ThreadID>::iterator thread_it = currentThreads.begin();
+    DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...\n", tid);
 
-    while (thread_it != currentThreads.end()) {
-        if (!isThreadSuspended(*thread_it)) {
-            activateThread(*thread_it);
-        }
-        thread_it++;
-    }
-}
+    scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay);
 
-void
-InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Disabling of concurrent virtual processor execution",
-            vpe);
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
 
-    scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, dummyInst);
+    _status = Running;
 }
 
 void
-InOrderCPU::disableVPEs(ThreadID tid, unsigned vpe)
+InOrderCPU::deallocateThread(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Disabling Concurrent Execution of "
-            "virtual processors %i", vpe);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling deallocate thread.\n", tid);
 
-    unsigned base_vpe = TheISA::getVirtProcNum(tcBase(tid));
-
-    list<ThreadID>::iterator thread_it = activeThreads.begin();
-
-    vector<list<ThreadID>::iterator> removeList;
+    if (isThreadActive(tid)) {
+        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        list<ThreadID>::iterator thread_it =
+            std::find(activeThreads.begin(), activeThreads.end(), tid);
 
-    while (thread_it != activeThreads.end()) {
-        if (base_vpe != vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
-    }
+        removePipelineStalls(*thread_it);
 
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
+        activeThreads.erase(thread_it);
     }
-}
 
-void
-InOrderCPU::enableMultiThreading(unsigned vpe)
-{
-    // Schedule event to take place at end of cycle
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on "
-            "virtual processor %i", vpe);
-
-    scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, dummyInst);
+    // TODO: "Un"Load/Unmap register file state
+  
 }
 
 void
-InOrderCPU::enableThreads(unsigned vpe)
+InOrderCPU::removePipelineStalls(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Multithreading on "
-            "virtual processor %i", vpe);
-
-    list<ThreadID>::iterator thread_it = currentThreads.begin();
+    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
+            tid);
 
-    while (thread_it != currentThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            if (!isThreadSuspended(*thread_it)) {
-                activateThread(*thread_it);
-            }
-        }
-        thread_it++;
+    for (int stNum = 0; stNum < NumStages ; stNum++) {
+        pipelineStage[stNum]->removeStalls(tid);
     }
-}
-void
-InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe)
-{
-    // Schedule event to take place at end of cycle
-   DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on "
-            "virtual processor %i", tid, vpe);
 
-    scheduleCpuEvent(DisableThreads, NoFault, tid, dummyInst);
 }
 
-void
-InOrderCPU::disableThreads(ThreadID tid, unsigned vpe)
+bool
+InOrderCPU::isThreadSuspended(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[tid:%i]: Disabling Multithreading on "
-            "virtual processor %i", tid, vpe);
-
-    list<ThreadID>::iterator thread_it = activeThreads.begin();
-
-    vector<list<ThreadID>::iterator> removeList;
-
-    while (thread_it != activeThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
-    }
+  list<ThreadID>::iterator isSuspended =
+      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
 
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
-    }
+    return (isSuspended!= suspendedThreads.end());
 }
 
 void
@@ -958,6 +833,12 @@ InOrderCPU::activateNextReadyContext(int delay)
     _status = Running;
 }
 
+void
+InOrderCPU::haltContext(ThreadID tid, int delay)
+{
+    suspendContext(tid, delay);
+}
+
 void
 InOrderCPU::suspendContext(ThreadID tid, int delay)
 {
@@ -973,24 +854,6 @@ InOrderCPU::suspendThread(ThreadID tid)
     suspendedThreads.push_back(tid);    
 }
 
-void
-InOrderCPU::deallocateContext(ThreadID tid, int delay)
-{
-    scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay);
-}
-
-void
-InOrderCPU::deallocateThread(ThreadID tid)
-{
-    DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...", tid);
-
-    removeFromCurrentThreads(tid);
-
-    deactivateThread(tid);
-
-    squashThreadInPipeline(tid);
-}
-
 void
 InOrderCPU::squashThreadInPipeline(ThreadID tid)
 {
@@ -1000,45 +863,12 @@ InOrderCPU::squashThreadInPipeline(ThreadID tid)
     }
 }
 
-void
-InOrderCPU::haltContext(ThreadID tid, int delay)
-{
-    DPRINTF(InOrderCPU, "[tid:%i]: Halt context called.\n", tid);
-
-    // Halt is same thing as deallocate for now
-    // @TODO: Differentiate between halt & deallocate in the CPU
-    // model
-    deallocateContext(tid, delay);
-}
-
-void
-InOrderCPU::insertThread(ThreadID tid)
-{
-    panic("Unimplemented Function\n.");
-}
-
-void
-InOrderCPU::removeThread(ThreadID tid)
-{
-    DPRINTF(InOrderCPU, "Removing Thread %i from CPU.\n", tid);
-
-    /** Broadcast to CPU resources*/
-}
-
 PipelineStage*
 InOrderCPU::getPipeStage(int stage_num)
 {
     return pipelineStage[stage_num];
 }
 
-
-void
-InOrderCPU::activateWhenReady(ThreadID tid)
-{
-    panic("Unimplemented Function\n.");
-}
-
-
 uint64_t
 InOrderCPU::readPC(ThreadID tid)
 {
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 854f5167c..c31481421 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -179,10 +179,6 @@ class InOrderCPU : public BaseCPU
         DeactivateThread,
         DeallocateThread,
         SuspendThread,
-        DisableThreads,
-        EnableThreads,
-        DisableVPEs,
-        EnableVPEs,
         Trap,
         InstGraduated,
         SquashFromMemStall,
@@ -347,18 +343,6 @@ class InOrderCPU : public BaseCPU
     void trap(Fault fault, ThreadID tid, int delay = 0);
     void trapCPU(Fault fault, ThreadID tid);
 
-    /** squashFromMemStall() - sets up a squash event
-     *  squashDueToMemStall() - squashes pipeline
-     */
-    void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
-    void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
-
-    /** Setup CPU to insert a thread's context */
-    void insertThread(ThreadID tid);
-
-    /** Remove all of a thread's context from CPU */
-    void removeThread(ThreadID tid);
-
     /** Add Thread to Active Threads List. */
     void activateContext(ThreadID tid, int delay = 0);
     void activateThread(ThreadID tid);
@@ -367,16 +351,28 @@ class InOrderCPU : public BaseCPU
     void activateNextReadyContext(int delay = 0);
     void activateNextReadyThread();
 
-    /** Remove Thread from Active Threads List */
+    /** Remove from Active Thread List */
+    void deactivateContext(ThreadID tid, int delay = 0);
+    void deactivateThread(ThreadID tid);
+
+    /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */
+    void haltContext(ThreadID tid, int delay = 0);
     void suspendContext(ThreadID tid, int delay = 0);
     void suspendThread(ThreadID tid);
 
-    /** Remove Thread from Active Threads List &&
-     *  Remove Thread Context from CPU.
-     */
+    /** Remove Thread from Active Threads List, Remove Any Loaded Thread State */
     void deallocateContext(ThreadID tid, int delay = 0);
     void deallocateThread(ThreadID tid);
-    void deactivateThread(ThreadID tid);
+
+    /** squashFromMemStall() - sets up a squash event
+     *  squashDueToMemStall() - squashes pipeline
+     */
+    void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
+    void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
+
+    void removePipelineStalls(ThreadID tid);
+    void squashThreadInPipeline(ThreadID tid);
+    void squashBehindMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
 
     PipelineStage* getPipeStage(int stage_num);
 
@@ -387,37 +383,6 @@ class InOrderCPU : public BaseCPU
         return 0;
     }
 
-    /** Remove Thread from Active Threads List &&
-     *  Remove Thread Context from CPU.
-     */
-    void haltContext(ThreadID tid, int delay = 0);
-
-    void removePipelineStalls(ThreadID tid);
-
-    void squashThreadInPipeline(ThreadID tid);
-
-    /// Notify the CPU to enable a virtual processor element.
-    virtual void enableVirtProcElement(unsigned vpe);
-    void enableVPEs(unsigned vpe);
-
-    /// Notify the CPU to disable a virtual processor element.
-    virtual void disableVirtProcElement(ThreadID tid, unsigned vpe);
-    void disableVPEs(ThreadID tid, unsigned vpe);
-
-    /// Notify the CPU that multithreading is enabled.
-    virtual void enableMultiThreading(unsigned vpe);
-    void enableThreads(unsigned vpe);
-
-    /// Notify the CPU that multithreading is disabled.
-    virtual void disableMultiThreading(ThreadID tid, unsigned vpe);
-    void disableThreads(ThreadID tid, unsigned vpe);
-
-    /** Activate a Thread When CPU Resources are Available. */
-    void activateWhenReady(ThreadID tid);
-
-    /** Add or Remove a Thread Context in the CPU. */
-    void doContextSwitch();
-
     /** Update The Order In Which We Process Threads. */
     void updateThreadPriority();
 
@@ -615,21 +580,15 @@ class InOrderCPU : public BaseCPU
     /** Active Threads List */
     std::list<ThreadID> activeThreads;
 
-    /** Current Threads List */
-    std::list<ThreadID> currentThreads;
-
     /** Ready Threads List */
     std::list<ThreadID> readyThreads;
 
     /** Suspended Threads List */
     std::list<ThreadID> suspendedThreads;
 
-    /** Thread Status Functions (Unused Currently) */
-    bool isThreadInCPU(ThreadID tid);
+    /** Thread Status Functions */
     bool isThreadActive(ThreadID tid);
     bool isThreadSuspended(ThreadID tid);
-    void addToCurrentThreads(ThreadID tid);
-    void removeFromCurrentThreads(ThreadID tid);
 
   private:
     /** The activity recorder; used to tell if the CPU has any
@@ -643,7 +602,8 @@ class InOrderCPU : public BaseCPU
     ThreadID numActiveThreads() { return activeThreads.size(); }
 
     /** Thread id of active thread
-     *  Only used for SwitchOnCacheMiss model. Assumes only 1 thread active
+     *  Only used for SwitchOnCacheMiss model. 
+     *  Assumes only 1 thread active
      */
     ThreadID activeThreadId() 
     { 
@@ -672,9 +632,6 @@ class InOrderCPU : public BaseCPU
     virtual void wakeup();
 #endif
 
-    /** Gets a free thread id. Use if thread ids change across system. */
-    ThreadID getFreeTid();
-
     // LL/SC debug functionality
     unsigned stCondFails;
 
@@ -740,18 +697,9 @@ class InOrderCPU : public BaseCPU
     /** Per-Stage Instruction Tracing */
     bool stageTracing;
 
-    /** Is there a context switch pending? */
-    bool contextSwitch;
-
-    /** Threads Scheduled to Enter CPU */
-    std::list<int> cpuWaitList;
-
     /** The cycle that the CPU was last running, used for statistics. */
     Tick lastRunningCycle;
 
-    /** Number of Virtual Processors the CPU can process */
-    unsigned numVirtProcs;
-
     /** Update Thread , used for statistic purposes*/
     inline void tickThreadStats();
 
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 5ab839615..79f8de05d 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -583,30 +583,6 @@ InOrderDynInst::deallocateContext(int thread_num)
     this->cpu->deallocateContext(thread_num);
 }
 
-void
-InOrderDynInst::enableVirtProcElement(unsigned vpe)
-{
-    this->cpu->enableVirtProcElement(vpe);
-}
-
-void
-InOrderDynInst::disableVirtProcElement(unsigned vpe)
-{
-    this->cpu->disableVirtProcElement(threadNumber, vpe);
-}
-
-void
-InOrderDynInst::enableMultiThreading(unsigned vpe)
-{
-    this->cpu->enableMultiThreading(vpe);
-}
-
-void
-InOrderDynInst::disableMultiThreading(unsigned vpe)
-{
-    this->cpu->disableMultiThreading(threadNumber, vpe);
-}
-
 template<class T>
 inline Fault
 InOrderDynInst::read(Addr addr, T &data, unsigned flags)
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 522b4e8d7..b573c1029 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -515,12 +515,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     ////////////////////////////////////////////////////////////
     virtual void deallocateContext(int thread_num);
 
-    virtual void enableVirtProcElement(unsigned vpe);
-    virtual void disableVirtProcElement(unsigned vpe);
-
-    virtual void enableMultiThreading(unsigned vpe);
-    virtual void disableMultiThreading(unsigned vpe);
-
     ////////////////////////////////////////////////////////////
     //
     //  PROGRAM COUNTERS - PC/NPC/NPC
-- 
cgit v1.2.3


From 96b493d3159f7e94b8e53edbe562e28076f2af95 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:47 -0500
Subject: inorder: ready/suspend status fns update/add in the use of
 isThreadReady & isThreadSuspended functions.Check in activateThread what list
 a thread is on so it can be managed accordingly.

---
 src/cpu/inorder/cpu.cc | 56 +++++++++++++++++++++++++++++++++++---------------
 src/cpu/inorder/cpu.hh |  1 +
 2 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 954309a74..ec6bb21ee 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -628,7 +628,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
     resPool->scheduleEvent(c_event, inst, 0, 0, tid);
 }
 
-inline bool
+bool
 InOrderCPU::isThreadActive(ThreadID tid)
 {
   list<ThreadID>::iterator isActive =
@@ -637,6 +637,23 @@ InOrderCPU::isThreadActive(ThreadID tid)
     return (isActive != activeThreads.end());
 }
 
+bool
+InOrderCPU::isThreadReady(ThreadID tid)
+{
+  list<ThreadID>::iterator isReady =
+      std::find(readyThreads.begin(), readyThreads.end(), tid);
+
+    return (isReady != readyThreads.end());
+}
+
+bool
+InOrderCPU::isThreadSuspended(ThreadID tid)
+{
+  list<ThreadID>::iterator isSuspended =
+      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
+
+    return (isSuspended != suspendedThreads.end());
+}
 
 void
 InOrderCPU::activateNextReadyThread()
@@ -655,26 +672,40 @@ InOrderCPU::activateNextReadyThread()
         readyThreads.erase(ready_it);                        
     } else {
         DPRINTF(InOrderCPU,
-                "No Ready Threads to Activate.\n");
+                "Attempting to activate new thread, but No Ready Threads to"
+                "activate.\n");
     }        
 }
 
 void
 InOrderCPU::activateThread(ThreadID tid)
 {
+    if (isThreadSuspended(tid)) {
+        DPRINTF(InOrderCPU,
+                "Removing [tid:%i] from suspended threads list.\n", tid);
+
+        list<ThreadID>::iterator susp_it =
+            std::find(suspendedThreads.begin(), suspendedThreads.end(), 
+                      tid);
+        suspendedThreads.erase(susp_it);                        
+    }
+
     if (threadModel == SwitchOnCacheMiss &&
         numActiveThreads() == 1) {
         DPRINTF(InOrderCPU,
-                "Ignoring Activation of [tid:%i]. Placing on "
-                "ready list\n", tid);        
+                "Ignoring activation of [tid:%i], since [tid:%i] is "
+                "already running.\n", tid, activeThreadId());
+        
+        DPRINTF(InOrderCPU,"Placing [tid:%i] ready threads list\n", 
+                tid);        
 
         readyThreads.push_back(tid);
         
-    } else if (!isThreadActive(tid)) {
+    } else if (!isThreadActive(tid)) {                
         DPRINTF(InOrderCPU,
-                "Adding Thread %i to active threads list in CPU.\n", tid);
+                "Adding [tid:%i] to active threads list.\n", tid);
         activeThreads.push_back(tid);
-
+        
         wakeCPU();
     }
 }
@@ -710,6 +741,8 @@ InOrderCPU::deactivateThread(ThreadID tid)
 
         activeThreads.erase(thread_it);
     }
+
+    assert(!isThreadActive(tid));    
 }
 
 void
@@ -758,15 +791,6 @@ InOrderCPU::removePipelineStalls(ThreadID tid)
 
 }
 
-bool
-InOrderCPU::isThreadSuspended(ThreadID tid)
-{
-  list<ThreadID>::iterator isSuspended =
-      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
-
-    return (isSuspended!= suspendedThreads.end());
-}
-
 void
 InOrderCPU::updateThreadPriority()
 {
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index c31481421..f4f7cb390 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -588,6 +588,7 @@ class InOrderCPU : public BaseCPU
 
     /** Thread Status Functions */
     bool isThreadActive(ThreadID tid);
+    bool isThreadReady(ThreadID tid);
     bool isThreadSuspended(ThreadID tid);
 
   private:
-- 
cgit v1.2.3


From 4ea296e29686154656c380982f987d7b6e1774f0 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:26:54 -0500
Subject: inorder: fetch thread bug dont check total # of threads but instead
 all active threads

---
 src/cpu/inorder/cpu.hh         |  2 +-
 src/cpu/inorder/first_stage.cc | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index f4f7cb390..7ac433723 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -611,7 +611,7 @@ class InOrderCPU : public BaseCPU
         if (numActiveThreads() > 0)
             return activeThreads.front();
         else
-            return -1;
+            return InvalidThreadID;
     }
     
      
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index 1427ca46a..75e13e559 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -205,11 +205,12 @@ FirstStage::processInsts(ThreadID tid)
 ThreadID
 FirstStage::getFetchingThread(FetchPriority &fetch_priority)
 {
-    if (numThreads > 1) {
-        switch (fetch_priority) {
+    ThreadID num_active_threads = cpu->numActiveThreads();
 
+    if (num_active_threads > 1) {
+        switch (fetch_priority) {
           case SingleThread:
-            return 0;
+            return cpu->activeThreadId();
 
           case RoundRobin:
             return roundRobin();
@@ -217,7 +218,7 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
           default:
             return InvalidThreadID;
         }
-    } else {
+    } else if (num_active_threads == 1) {
         ThreadID tid = *activeThreads->begin();
 
         if (stageStatus[tid] == Running ||
@@ -226,8 +227,9 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
         } else {
             return InvalidThreadID;
         }
-    }
-
+    } else {
+        return InvalidThreadID;
+    }    
 }
 
 ThreadID
-- 
cgit v1.2.3


From 4dbc2f17180d3d8c82d5414daa55b102de9755e5 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:02 -0500
Subject: inorder: suspend in respool give resources their own specific
 activity to do for a "suspend" event instead of defaulting to deactivating
 the thread for a suspend thread event. This really matters for the fetch
 sequence unit which wants to remove the thread from fetching while other
 units want to ignore a thread suspension. If you deactivate a thread in a
 resource then you may lose some of the allotted bandwidth that the thread is
 taking up...

---
 src/cpu/inorder/resource.hh                 |  4 +++
 src/cpu/inorder/resource_pool.cc            | 48 +++++++++++++++++++++++++----
 src/cpu/inorder/resource_pool.hh            |  3 ++
 src/cpu/inorder/resources/cache_unit.cc     | 14 +++++++--
 src/cpu/inorder/resources/fetch_seq_unit.cc |  6 ++++
 src/cpu/inorder/resources/fetch_seq_unit.hh |  1 +
 6 files changed, 68 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index f7c4b8fcd..4ae4db818 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -93,6 +93,10 @@ class Resource {
      */
     virtual void deactivateThread(ThreadID tid);
 
+    /** Resources that care about thread activation override this. */
+    virtual void suspendThread(ThreadID tid) { }
+    
+
     /** Resources that care when an instruction has been graduated
      *  can override this
      */
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 97ba4d087..45a4a9e60 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -226,7 +226,7 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
         }
         break;
 
-      case InOrderCPU::SuspendThread:
+      case InOrderCPU::DeactivateThread:
       case InOrderCPU::DeallocateThread:
         {
 
@@ -246,6 +246,23 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
         }
         break;
 
+      case InOrderCPU::SuspendThread:
+        {
+
+            DPRINTF(Resource, "Scheduling Suspend Thread Resource Pool Event for tick %i.\n",
+                    curTick + delay);
+            ResPoolEvent *res_pool_event = new ResPoolEvent(this,
+                                                            e_type,
+                                                            inst,
+                                                            inst->squashingStage,
+                                                            inst->bdelaySeqNum,
+                                                            tid);
+
+            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+
+        }
+        break;
+
       case ResourcePool::InstGraduated:
         {
             DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool "
@@ -309,8 +326,9 @@ void
 ResourcePool::squashAll(DynInstPtr inst, int stage_num,
                         InstSeqNum done_seq_num, ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above "
-            "[sn:%i].\n", tid, stage_num, done_seq_num);
+    DPRINTF(Resource, "[tid:%i] Broadcasting Squash All Event "
+            " starting w/stage %i for all instructions above [sn:%i].\n",
+             tid, stage_num, done_seq_num);
 
     int num_resources = resources.size();
 
@@ -323,8 +341,9 @@ void
 ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num,
                              InstSeqNum done_seq_num, ThreadID tid)
 {
-    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above "
-            "[sn:%i].\n", stage_num, tid, done_seq_num);
+    DPRINTF(Resource, "[tid:%i] Broadcasting SquashDueToMemStall Event"
+            " starting w/stage %i for all instructions above [sn:%i].\n",
+            tid, stage_num, done_seq_num);
 
     int num_resources = resources.size();
 
@@ -370,6 +389,19 @@ ResourcePool::deactivateAll(ThreadID tid)
     }
 }
 
+void
+ResourcePool::suspendAll(ThreadID tid)
+{
+    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Suspension to all resources.\n",
+            tid);
+
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        resources[idx]->suspendThread(tid);
+    }
+}
+
 void
 ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
 {
@@ -409,11 +441,15 @@ ResourcePool::ResPoolEvent::process()
         resPool->activateAll(tid);
         break;
 
-      case InOrderCPU::SuspendThread:
+      case InOrderCPU::DeactivateThread:
       case InOrderCPU::DeallocateThread:
         resPool->deactivateAll(tid);
         break;
 
+      case InOrderCPU::SuspendThread:
+        resPool->suspendAll(tid);
+        break;
+
       case ResourcePool::InstGraduated:
         resPool->instGraduated(seqNum, tid);
         break;
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index 61e691f35..ae63c4c59 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -172,6 +172,9 @@ class ResourcePool {
     /** De-Activate Thread in all resources */
     void deactivateAll(ThreadID tid);
 
+    /** De-Activate Thread in all resources */
+    void suspendAll(ThreadID tid);
+
     /** Broadcast graduation to all resources */
     void instGraduated(InstSeqNum seq_num, ThreadID tid);
 
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 570d27fbe..8f92db3e4 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -158,9 +158,9 @@ CacheUnit::getSlot(DynInstPtr inst)
         return new_slot;
     } else {
         DPRINTF(InOrderCachePort,
-                "Denying request because there is an outstanding"
+                "[tid:%i] Denying request because there is an outstanding"
                 " request to/for addr. %08p. by [sn:%i] @ tick %i\n",
-                req_addr, addrMap[req_addr], inst->memTime);
+                inst->readTid(), req_addr, addrMap[req_addr], inst->memTime);
         return -1;
     }
 }
@@ -702,6 +702,13 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
         cache_req->setMemAccPending(false);
         cache_req->setMemAccCompleted();
 
+        if (cache_req->isMemStall() && 
+            cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {    
+            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n");
+            
+            cpu->activateContext(tid);            
+        }
+        
         // Wake up the CPU (if it went to sleep and was waiting on this
         // completion event).
         cpu->wakeCPU();
@@ -784,6 +791,9 @@ CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
     //       thread then you need to reevaluate this code
     // NOTE: squash should originate from 
     //       pipeline_stage.cc:processInstSchedule
+    DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n", 
+            squash_seq_num + 1);
+    
     squash(inst, stage_num, squash_seq_num + 1, tid);    
 }
 
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index 1d0b92075..e0b9ea1f9 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -336,3 +336,9 @@ FetchSeqUnit::deactivateThread(ThreadID tid)
     if (thread_it != cpu->fetchPriorityList.end())
         cpu->fetchPriorityList.erase(thread_it);
 }
+
+void
+FetchSeqUnit::suspendThread(ThreadID tid)
+{
+    deactivateThread(tid);    
+}
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh
index a4495564b..fdbc4521f 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.hh
+++ b/src/cpu/inorder/resources/fetch_seq_unit.hh
@@ -59,6 +59,7 @@ class FetchSeqUnit : public Resource {
     virtual void init();
     virtual void activateThread(ThreadID tid);
     virtual void deactivateThread(ThreadID tid);
+    virtual void suspendThread(ThreadID tid);
     virtual void execute(int slot_num);
 
     /** Override default Resource squash sequence. This actually,
-- 
cgit v1.2.3


From 611a8642c2d50989da15e1ddd9dc87c036e8ab99 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:12 -0500
Subject: inorder: mem. mgmt. update update address List and address Map to
 take into account multiple threads

---
 src/cpu/inorder/resources/cache_unit.cc | 18 +++++++++++-------
 src/cpu/inorder/resources/cache_unit.hh |  4 ++--
 2 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 8f92db3e4..3de5c518a 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -131,6 +131,8 @@ CacheUnit::init()
 int
 CacheUnit::getSlot(DynInstPtr inst)
 {
+    ThreadID tid = inst->readTid();
+    
     if (tlbBlocked[inst->threadNumber]) {
         return -1;
     }
@@ -142,7 +144,7 @@ CacheUnit::getSlot(DynInstPtr inst)
     Addr req_addr = inst->getMemAddr();
 
     if (resName == "icache_port" ||
-        find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) {
+        find(addrList[tid].begin(), addrList[tid].end(), req_addr) == addrList[tid].end()) {
 
         int new_slot = Resource::getSlot(inst);
 
@@ -150,8 +152,8 @@ CacheUnit::getSlot(DynInstPtr inst)
             return -1;
 
         inst->memTime = curTick;
-        addrList.push_back(req_addr);
-        addrMap[req_addr] = inst->seqNum;
+        addrList[tid].push_back(req_addr);
+        addrMap[tid][req_addr] = inst->seqNum;
         DPRINTF(InOrderCachePort,
                 "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
                 inst->readTid(), inst->seqNum, req_addr);
@@ -160,7 +162,7 @@ CacheUnit::getSlot(DynInstPtr inst)
         DPRINTF(InOrderCachePort,
                 "[tid:%i] Denying request because there is an outstanding"
                 " request to/for addr. %08p. by [sn:%i] @ tick %i\n",
-                inst->readTid(), req_addr, addrMap[req_addr], inst->memTime);
+                inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime);
         return -1;
     }
 }
@@ -168,15 +170,17 @@ CacheUnit::getSlot(DynInstPtr inst)
 void
 CacheUnit::freeSlot(int slot_num)
 {
-    vector<Addr>::iterator vect_it = find(addrList.begin(), addrList.end(),
+    ThreadID tid = reqMap[slot_num]->inst->readTid();
+
+    vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(),
             reqMap[slot_num]->inst->getMemAddr());
-    assert(vect_it != addrList.end());
+    assert(vect_it != addrList[tid].end());
 
     DPRINTF(InOrderCachePort,
             "[tid:%i]: Address %08p removed from dependency list\n",
             reqMap[slot_num]->inst->readTid(), (*vect_it));
 
-    addrList.erase(vect_it);
+    addrList[tid].erase(vect_it);
 
     Resource::freeSlot(slot_num);
 }
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index a6b07ebd9..26f6859ed 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -198,9 +198,9 @@ class CacheUnit : public Resource
 
     bool cacheBlocked;
 
-    std::vector<Addr> addrList;
+    std::vector<Addr> addrList[ThePipeline::MaxThreads];
 
-    std::map<Addr, InstSeqNum> addrMap;
+    std::map<Addr, InstSeqNum> addrMap[ThePipeline::MaxThreads];
 
   public:
     int cacheBlkSize;
-- 
cgit v1.2.3


From 3eb04b4ad73cb66e86d09ffd5989a93d9f62b299 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:25 -0500
Subject: inorder: add threadmodel flag this prints out messages relative to
 what threading model is being used (smt, switch-on-miss, single, etc.)

---
 src/cpu/inorder/SConscript | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript
index 82a1028c2..afc6a29e4 100644
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@@ -52,12 +52,14 @@ if 'InOrderCPU' in env['CPU_MODELS']:
 	TraceFlag('InOrderUseDef')
 	TraceFlag('InOrderMDU')
 	TraceFlag('InOrderGraduation')
+	TraceFlag('ThreadModel')
 	TraceFlag('RefCount')
 
 	CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU',
 	       'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred',
 	       'InOrderDecode', 'InOrderExecute', 'InOrderInstBuffer', 'InOrderUseDef',
-	       'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource'])
+	       'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource',
+	       'ThreadModel'])
 
 	Source('pipeline_traits.cc')        
 	Source('inorder_dyn_inst.cc')
-- 
cgit v1.2.3


From 90d3b45a566847fe15095b92238e32973ad9cc0e Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:38 -0500
Subject: inorder: ready thread wakeup allow a thread to wakeup and be
 activated after it has been in suspended state and another thread is switched
 out. Need to give pipeline stages a "activateThread" function so that can get
 to their suspended instruction when the time is right.

---
 src/cpu/inorder/cpu.cc                  | 14 +++++++++++++-
 src/cpu/inorder/cpu.hh                  |  3 ++-
 src/cpu/inorder/pipeline_stage.cc       | 30 ++++++++++++++++++++++++++++++
 src/cpu/inorder/pipeline_stage.hh       |  2 ++
 src/cpu/inorder/resources/cache_unit.cc |  5 ++++-
 5 files changed, 51 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index ec6bb21ee..501150386 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -674,6 +674,8 @@ InOrderCPU::activateNextReadyThread()
         DPRINTF(InOrderCPU,
                 "Attempting to activate new thread, but No Ready Threads to"
                 "activate.\n");
+        DPRINTF(InOrderCPU,
+                "Unable to switch to next active thread.\n");
     }        
 }
 
@@ -696,7 +698,7 @@ InOrderCPU::activateThread(ThreadID tid)
                 "Ignoring activation of [tid:%i], since [tid:%i] is "
                 "already running.\n", tid, activeThreadId());
         
-        DPRINTF(InOrderCPU,"Placing [tid:%i] ready threads list\n", 
+        DPRINTF(InOrderCPU,"Placing [tid:%i] on ready threads list\n", 
                 tid);        
 
         readyThreads.push_back(tid);
@@ -706,10 +708,20 @@ InOrderCPU::activateThread(ThreadID tid)
                 "Adding [tid:%i] to active threads list.\n", tid);
         activeThreads.push_back(tid);
         
+        activateThreadInPipeline(tid);
+        
         wakeCPU();
     }
 }
 
+void
+InOrderCPU::activateThreadInPipeline(ThreadID tid)
+{
+    for (int stNum=0; stNum < NumStages; stNum++) {
+        pipelineStage[stNum]->activateThread(tid);
+    }    
+}
+
 void
 InOrderCPU::deactivateContext(ThreadID tid, int delay)
 {
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 7ac433723..1e514e1ed 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -346,7 +346,8 @@ class InOrderCPU : public BaseCPU
     /** Add Thread to Active Threads List. */
     void activateContext(ThreadID tid, int delay = 0);
     void activateThread(ThreadID tid);
-
+    void activateThreadInPipeline(ThreadID tid);
+    
     /** Add Thread to Active Threads List. */
     void activateNextReadyContext(int delay = 0);
     void activateNextReadyThread();
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 30a3733b0..ef91f206b 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -558,6 +558,28 @@ PipelineStage::updateStatus()
     }
 }
 
+void 
+PipelineStage::activateThread(ThreadID tid)
+{    
+    if (cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+        if (!switchedOutValid[tid]) {
+            DPRINTF(InOrderStage, "[tid:%i] No instruction available in "
+                    "switch out buffer.\n", tid);        
+        } else {
+            DynInstPtr inst = switchedOutBuffer[tid];
+
+            DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
+                    tid, inst->seqNum, inst->readPC(), inst->threadNumber);
+
+            skidBuffer[tid].push(inst);            
+
+            switchedOutBuffer[tid] = NULL;
+
+            switchedOutValid[tid] = false;            
+        }        
+    }
+    
+}
 
 
 void
@@ -945,6 +967,11 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                 if (req->isMemStall() && 
                     cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
                     // Save Stalling Instruction
+                    DPRINTF(ThreadModel, "[tid:%i] Detected cache miss.\n", tid);
+
+                    DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n",
+                             tid, inst->seqNum);                    
+
                     switchedOutBuffer[tid] = inst;
                     switchedOutValid[tid] = true;
                     
@@ -956,9 +983,12 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                     // Switch On Cache Miss
                     //=====================
                     // Suspend Thread at end of cycle
+                    DPRINTF(ThreadModel, "Suspending [tid:%i] due to cache miss.\n", tid);
                     cpu->suspendContext(tid);                    
 
                     // Activate Next Ready Thread at end of cycle
+                    DPRINTF(ThreadModel, "Attempting to activate next ready thread due to"
+                            " cache miss.\n");
                     cpu->activateNextReadyContext();                                                                                               
                 }
                 
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index f10906e4c..dfe1ac7c3 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -235,6 +235,8 @@ class PipelineStage
 
 
   public:
+    virtual void activateThread(ThreadID tid);
+    
     /** Squashes if there is a PC-relative branch that was predicted
      * incorrectly. Sends squash information back to fetch.
      */
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 3de5c518a..2cf6c3195 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -708,9 +708,12 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
 
         if (cache_req->isMemStall() && 
             cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {    
-            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n");
+            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid);
             
             cpu->activateContext(tid);            
+            
+            DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
+                    "miss.\n", tid);            
         }
         
         // Wake up the CPU (if it went to sleep and was waiting on this
-- 
cgit v1.2.3


From aacc5cb205c17a91545a5d8209f5c4bda85543a9 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:49 -0500
Subject: inorder: add updatePC event to resPool this will be used for when a
 thread comes back from a cache miss, it needs to update the PCs because the
 inst might of been a branch or delayslot in which the next PC isnt always a
 straight addition

---
 src/cpu/inorder/pipeline_stage.cc           |  7 +++++-
 src/cpu/inorder/resource.hh                 |  4 ++++
 src/cpu/inorder/resource_pool.cc            | 34 +++++++++++++++++++++++++++++
 src/cpu/inorder/resource_pool.hh            |  4 ++++
 src/cpu/inorder/resources/cache_unit.cc     |  6 ++---
 src/cpu/inorder/resources/fetch_seq_unit.cc | 14 ++++++++++++
 src/cpu/inorder/resources/fetch_seq_unit.hh |  2 ++
 7 files changed, 67 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index ef91f206b..620951e34 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -571,10 +571,15 @@ PipelineStage::activateThread(ThreadID tid)
             DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
                     tid, inst->seqNum, inst->readPC(), inst->threadNumber);
 
+            // Make instruction available for pipeline processing
             skidBuffer[tid].push(inst);            
 
-            switchedOutBuffer[tid] = NULL;
+            // Update PC so that we start fetching after this instruction to prevent
+            // "double"-execution of instructions
+            cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::UpdateAfterContextSwitch, inst, 0, 0, tid);
 
+            // Clear switchout buffer
+            switchedOutBuffer[tid] = NULL;
             switchedOutValid[tid] = false;            
         }        
     }
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index 4ae4db818..383340df2 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -96,6 +96,10 @@ class Resource {
     /** Resources that care about thread activation override this. */
     virtual void suspendThread(ThreadID tid) { }
     
+    /** Will be called the cycle before a context switch. Any bookkeeping
+     *  that needs to be kept for that, can be done here
+     */
+    virtual void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { }    
 
     /** Resources that care when an instruction has been graduated
      *  can override this
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 45a4a9e60..20f112a66 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -201,6 +201,9 @@ ResourcePool::slotsInUse(int res_idx)
     return resources[res_idx]->slotsInUse();
 }
 
+//@todo: split this function and call this version schedulePoolEvent
+//       and use this scheduleEvent for scheduling a specific event on 
+//       a resource
 void
 ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
                             int delay,  int res_idx, ThreadID tid)
@@ -310,6 +313,20 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
         }
         break;
 
+      case ResourcePool::UpdateAfterContextSwitch:
+        {
+            DPRINTF(Resource, "Scheduling UpdatePC Resource Pool Event for tick %i.\n",
+                    curTick + delay);
+            ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
+                                                            inst,
+                                                            inst->squashingStage,
+                                                            inst->seqNum,
+                                                            inst->readTid());
+            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+
+        }
+        break;
+
       default:
         DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", 
                 InOrderCPU::eventNames[e_type]);
@@ -415,6 +432,19 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
     }
 }
 
+void
+ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    DPRINTF(Resource, "[tid:%i] Broadcasting Update PC to all resources.\n",
+            tid);
+
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        resources[idx]->updateAfterContextSwitch(inst, tid);
+    }
+}
+
 ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool)
     : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool),
       eventType((InOrderCPU::CPUEventType) Default)
@@ -462,6 +492,10 @@ ResourcePool::ResPoolEvent::process()
         resPool->squashDueToMemStall(inst, stageNum, seqNum, tid);
         break;
 
+      case ResourcePool::UpdateAfterContextSwitch:
+        resPool->updateAfterContextSwitch(inst, tid);
+        break;
+
       default:
         fatal("Unrecognized Event Type");
     }
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index ae63c4c59..3f62d2caa 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -63,6 +63,7 @@ class ResourcePool {
     enum ResPoolEventType {
         InstGraduated = InOrderCPU::NumCPUEvents,
         SquashAll,
+        UpdateAfterContextSwitch,
         Default
     };
 
@@ -175,6 +176,9 @@ class ResourcePool {
     /** De-Activate Thread in all resources */
     void suspendAll(ThreadID tid);
 
+    /** Broadcast Context Switch Update to all resources */
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
+
     /** Broadcast graduation to all resources */
     void instGraduated(InstSeqNum seq_num, ThreadID tid);
 
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 2cf6c3195..4f9ed3eca 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -49,14 +49,14 @@ using namespace ThePipeline;
 Tick
 CacheUnit::CachePort::recvAtomic(PacketPtr pkt)
 {
-    panic("DefaultFetch doesn't expect recvAtomic callback!");
+    panic("CacheUnit::CachePort doesn't expect recvAtomic callback!");
     return curTick;
 }
 
 void
 CacheUnit::CachePort::recvFunctional(PacketPtr pkt)
 {
-    panic("DefaultFetch doesn't expect recvFunctional callback!");
+    panic("CacheUnit::CachePort doesn't expect recvFunctional callback!");
 }
 
 void
@@ -65,7 +65,7 @@ CacheUnit::CachePort::recvStatusChange(Status status)
     if (status == RangeChange)
         return;
 
-    panic("DefaultFetch doesn't expect recvStatusChange callback!");
+    panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!");
 }
 
 bool
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index e0b9ea1f9..c217f972e 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -342,3 +342,17 @@ FetchSeqUnit::suspendThread(ThreadID tid)
 {
     deactivateThread(tid);    
 }
+
+void
+FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    pcValid[tid] = true;
+
+    PC[tid] = inst->readNextPC();
+    nextPC[tid] = inst->readNextNPC();
+    nextNPC[tid] = inst->readNextNPC() + instSize;
+
+
+    DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating  PC:%08p NPC:%08p NNPC:%08p.\n",
+            tid, PC[tid], nextPC[tid], nextNPC[tid]);
+}
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh
index fdbc4521f..3283e0330 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.hh
+++ b/src/cpu/inorder/resources/fetch_seq_unit.hh
@@ -61,6 +61,8 @@ class FetchSeqUnit : public Resource {
     virtual void deactivateThread(ThreadID tid);
     virtual void suspendThread(ThreadID tid);
     virtual void execute(int slot_num);
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
+    
 
     /** Override default Resource squash sequence. This actually,
      *  looks in the global communication buffer to get squash
-- 
cgit v1.2.3


From 069b38c0d546708491d0da84668ba32f82ca7cb8 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:27:58 -0500
Subject: inorder: track last branch committed when threads are switching
 in/out the CPU, we need to keep track of special cases like branches. Add
 appropriate variables in ThreadState t track this and then use these
 variables when updating pc after context switch

---
 src/cpu/inorder/cpu.cc                      | 27 +++++++++++++++++----------
 src/cpu/inorder/pipeline_stage.cc           |  9 ++++++---
 src/cpu/inorder/resources/fetch_seq_unit.cc | 26 +++++++++++++++++++-------
 src/cpu/inorder/thread_state.hh             | 13 +++++++++----
 4 files changed, 51 insertions(+), 24 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 501150386..8d41a18b4 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -709,7 +709,9 @@ InOrderCPU::activateThread(ThreadID tid)
         activeThreads.push_back(tid);
         
         activateThreadInPipeline(tid);
-        
+
+        thread[tid]->lastActivate = curTick;            
+
         wakeCPU();
     }
 }
@@ -888,6 +890,7 @@ InOrderCPU::suspendThread(ThreadID tid)
     DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid);
     deactivateThread(tid);
     suspendedThreads.push_back(tid);    
+    thread[tid]->lastSuspend = curTick;    
 }
 
 void
@@ -1063,15 +1066,22 @@ void
 InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 {
     // Set the CPU's PCs - This contributes to the precise state of the CPU 
-    // which can be used when restoring a thread to the CPU after a fork or 
-    // after an exception
-    // =================
-    // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if 
-    // it's a branch or not
+    // which can be used when restoring a thread to the CPU after after any
+    // type of context switching activity (fork, exception, etc.)
     setPC(inst->readPC(), tid);
     setNextPC(inst->readNextPC(), tid);
     setNextNPC(inst->readNextNPC(), tid);
 
+    if (inst->isControl()) {
+        thread[tid]->lastGradIsBranch = true;
+        thread[tid]->lastBranchPC = inst->readPC();
+        thread[tid]->lastBranchNextPC = inst->readNextPC();
+        thread[tid]->lastBranchNextNPC = inst->readNextNPC();        
+    } else {
+        thread[tid]->lastGradIsBranch = false;
+    }
+        
+
     // Finalize Trace Data For Instruction
     if (inst->traceData) {
         //inst->traceData->setCycle(curTick);
@@ -1082,9 +1092,6 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
         inst->traceData = NULL;
     }
 
-    // Set Last Graduated Instruction In Thread State
-    //thread[tid]->lastGradInst = inst;
-
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
 
@@ -1108,7 +1115,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     // Broadcast to other resources an instruction
     // has been completed
     resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
-                           tid);
+                           0, 0, tid);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 620951e34..55ee3ad12 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -568,15 +568,18 @@ PipelineStage::activateThread(ThreadID tid)
         } else {
             DynInstPtr inst = switchedOutBuffer[tid];
 
-            DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
-                    tid, inst->seqNum, inst->readPC(), inst->threadNumber);
+            DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into "
+                    "stage skidBuffer %i\n", tid, inst->seqNum, 
+                    inst->readPC(), inst->threadNumber);
 
             // Make instruction available for pipeline processing
             skidBuffer[tid].push(inst);            
 
             // Update PC so that we start fetching after this instruction to prevent
             // "double"-execution of instructions
-            cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)ResourcePool::UpdateAfterContextSwitch, inst, 0, 0, tid);
+            cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)
+                                        ResourcePool::UpdateAfterContextSwitch, 
+                                        inst, 0, 0, tid);
 
             // Clear switchout buffer
             switchedOutBuffer[tid] = NULL;
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index c217f972e..ba86a91f0 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -348,11 +348,23 @@ FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
 {
     pcValid[tid] = true;
 
-    PC[tid] = inst->readNextPC();
-    nextPC[tid] = inst->readNextNPC();
-    nextNPC[tid] = inst->readNextNPC() + instSize;
-
-
-    DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating  PC:%08p NPC:%08p NNPC:%08p.\n",
-            tid, PC[tid], nextPC[tid], nextNPC[tid]);
+    if (cpu->thread[tid]->lastGradIsBranch) {
+        /** This function assumes that the instruction causing the context
+         *  switch was right after the branch. Thus, if it's not, then
+         *  we are updating incorrectly here
+         */
+        assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC());
+        
+        PC[tid] = cpu->thread[tid]->lastBranchNextNPC;
+        nextPC[tid] = PC[tid] + instSize;
+        nextNPC[tid] = nextPC[tid] + instSize;
+    } else {
+        PC[tid] = inst->readNextPC();
+        nextPC[tid] = inst->readNextNPC();
+        nextNPC[tid] = inst->readNextNPC() + instSize;        
+    }
+    
+    DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch."
+            "Assigning  PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid], 
+            nextPC[tid], nextNPC[tid]);
 }
diff --git a/src/cpu/inorder/thread_state.hh b/src/cpu/inorder/thread_state.hh
index 422df30aa..0a171a99f 100644
--- a/src/cpu/inorder/thread_state.hh
+++ b/src/cpu/inorder/thread_state.hh
@@ -79,14 +79,14 @@ class InOrderThreadState : public ThreadState {
 #if FULL_SYSTEM
     InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num)
         : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num),
-          cpu(_cpu), inSyscall(0), trapPending(0)
+          cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
     { }
 #else
     InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num,
                        Process *_process)
         : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num,
                       _process),
-          cpu(_cpu), inSyscall(0), trapPending(0)
+          cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
     { }
 #endif
 
@@ -105,10 +105,15 @@ class InOrderThreadState : public ThreadState {
     /** Returns a pointer to the TC of this thread. */
     ThreadContext *getTC() { return tc; }
 
+    /** Return the thread id */
     int readTid() { return threadId(); }
 
-    /** Pointer to the last graduated instruction in the thread */
-    //DynInstPtr lastGradInst;
+    
+    /** Is last instruction graduated a branch? */
+    bool lastGradIsBranch;
+    Addr lastBranchPC;    
+    Addr lastBranchNextPC;    
+    Addr lastBranchNextNPC;    
 };
 
 #endif // __CPU_INORDER_THREAD_STATE_HH__
-- 
cgit v1.2.3


From 5e0b8337ed9c8aa975cd44df5565c2c3dde0c267 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:28:05 -0500
Subject: inorder: add/remove halt/deallocate context respectively Halt is
 called from the exit() system call while deallocate is unused. So to clear up
 things, just use halt and remove deallocate.

---
 src/cpu/inorder/cpu.cc           | 65 ++++++++++++++--------------------------
 src/cpu/inorder/cpu.hh           | 15 ++++++----
 src/cpu/inorder/resource_pool.cc |  7 +++--
 3 files changed, 38 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 8d41a18b4..5db86b258 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -98,7 +98,7 @@ std::string InOrderCPU::eventNames[NumCPUEvents] =
     "ActivateThread",
     "ActivateNextReadyThread",
     "DeactivateThread",
-    "DeallocateThread",
+    "HaltThread",
     "SuspendThread",
     "Trap",
     "InstGraduated",
@@ -123,8 +123,8 @@ InOrderCPU::CPUEvent::process()
         cpu->deactivateThread(tid);
         break;
 
-      case DeallocateThread:
-        cpu->deallocateThread(tid);
+      case HaltThread:
+        cpu->haltThread(tid);
         break;
 
       case SuspendThread: 
@@ -140,8 +140,7 @@ InOrderCPU::CPUEvent::process()
         break;
 
       default:
-        fatal("Unrecognized Event Type %d", cpuEventType);
-    
+        fatal("Unrecognized Event Type %s", eventNames[cpuEventType]);    
     }
     
     cpu->cpuEventRemoveList.push(this);
@@ -759,40 +758,6 @@ InOrderCPU::deactivateThread(ThreadID tid)
     assert(!isThreadActive(tid));    
 }
 
-void
-InOrderCPU::deallocateContext(ThreadID tid, int delay)
-{
-    DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...\n", tid);
-
-    scheduleCpuEvent(DeallocateThread, NoFault, tid, dummyInst, delay);
-
-    // Be sure to signal that there's some activity so the CPU doesn't
-    // deschedule itself.
-    activityRec.activity();
-
-    _status = Running;
-}
-
-void
-InOrderCPU::deallocateThread(ThreadID tid)
-{
-    DPRINTF(InOrderCPU, "[tid:%i]: Calling deallocate thread.\n", tid);
-
-    if (isThreadActive(tid)) {
-        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        list<ThreadID>::iterator thread_it =
-            std::find(activeThreads.begin(), activeThreads.end(), tid);
-
-        removePipelineStalls(*thread_it);
-
-        activeThreads.erase(thread_it);
-    }
-
-    // TODO: "Un"Load/Unmap register file state
-  
-}
-
 void
 InOrderCPU::removePipelineStalls(ThreadID tid)
 {
@@ -874,20 +839,36 @@ InOrderCPU::activateNextReadyContext(int delay)
 void
 InOrderCPU::haltContext(ThreadID tid, int delay)
 {
-    suspendContext(tid, delay);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
+
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst, delay);
+
+    activityRec.activity();
+}
+
+void
+InOrderCPU::haltThread(ThreadID tid)
+{
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Halted Threads List...\n", tid);
+    deactivateThread(tid);
+    squashThreadInPipeline(tid);   
+    haltedThreads.push_back(tid);    
+
+    if (threadModel == SwitchOnCacheMiss) {        
+        activateNextReadyContext();    
+    }
 }
 
 void
 InOrderCPU::suspendContext(ThreadID tid, int delay)
 {
     scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay);
-    //_status = Idle;
 }
 
 void
 InOrderCPU::suspendThread(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[tid: %i]: Placing on Suspended Threads List...\n", tid);
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Suspended Threads List...\n", tid);
     deactivateThread(tid);
     suspendedThreads.push_back(tid);    
     thread[tid]->lastSuspend = curTick;    
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 1e514e1ed..70013c0f5 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -177,7 +177,7 @@ class InOrderCPU : public BaseCPU
         ActivateThread,
         ActivateNextReadyThread,
         DeactivateThread,
-        DeallocateThread,
+        HaltThread,
         SuspendThread,
         Trap,
         InstGraduated,
@@ -357,16 +357,18 @@ class InOrderCPU : public BaseCPU
     void deactivateThread(ThreadID tid);
 
     /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */
-    void haltContext(ThreadID tid, int delay = 0);
     void suspendContext(ThreadID tid, int delay = 0);
     void suspendThread(ThreadID tid);
 
-    /** Remove Thread from Active Threads List, Remove Any Loaded Thread State */
-    void deallocateContext(ThreadID tid, int delay = 0);
-    void deallocateThread(ThreadID tid);
+    /** Halt Thread, Remove from Active Thread List, Place Thread on Halted 
+     *  Threads List 
+     */
+    void haltContext(ThreadID tid, int delay = 0);
+    void haltThread(ThreadID tid);
 
     /** squashFromMemStall() - sets up a squash event
      *  squashDueToMemStall() - squashes pipeline
+     *  @note: maybe squashContext/squashThread would be better?
      */
     void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
     void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
@@ -587,6 +589,9 @@ class InOrderCPU : public BaseCPU
     /** Suspended Threads List */
     std::list<ThreadID> suspendedThreads;
 
+    /** Halted Threads List */
+    std::list<ThreadID> haltedThreads;
+
     /** Thread Status Functions */
     bool isThreadActive(ThreadID tid);
     bool isThreadReady(ThreadID tid);
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 20f112a66..3750d18d6 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -204,6 +204,9 @@ ResourcePool::slotsInUse(int res_idx)
 //@todo: split this function and call this version schedulePoolEvent
 //       and use this scheduleEvent for scheduling a specific event on 
 //       a resource
+//@todo: For arguments that arent being used in a ResPoolEvent, a dummyParam
+//       or some typedef can be used to signify what's important info
+//       to the event construction
 void
 ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
                             int delay,  int res_idx, ThreadID tid)
@@ -229,8 +232,8 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
         }
         break;
 
+      case InOrderCPU::HaltThread:
       case InOrderCPU::DeactivateThread:
-      case InOrderCPU::DeallocateThread:
         {
 
             DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool "
@@ -472,7 +475,7 @@ ResourcePool::ResPoolEvent::process()
         break;
 
       case InOrderCPU::DeactivateThread:
-      case InOrderCPU::DeallocateThread:
+      case InOrderCPU::HaltThread:
         resPool->deactivateAll(tid);
         break;
 
-- 
cgit v1.2.3


From b4e0ef78379dd5bab0ee6ec824bca3f51dd484c6 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:28:12 -0500
Subject: inorder: set thread status' set Active/Suspended/Halted status for
 threads.  useful for system when determining if/when to exit simulation

---
 src/cpu/inorder/cpu.cc            | 12 ++++++++++--
 src/cpu/inorder/thread_context.hh |  1 -
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 5db86b258..d8fea79d9 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -711,6 +711,8 @@ InOrderCPU::activateThread(ThreadID tid)
 
         thread[tid]->lastActivate = curTick;            
 
+        tcBase(tid)->setStatus(ThreadContext::Active);    
+
         wakeCPU();
     }
 }
@@ -750,9 +752,11 @@ InOrderCPU::deactivateThread(ThreadID tid)
 
         removePipelineStalls(*thread_it);
 
-        //@TODO: change stage status' to Idle?
-
         activeThreads.erase(thread_it);
+
+        // Ideally, this should be triggered from the
+        // suspendContext/Thread functions
+        tcBase(tid)->setStatus(ThreadContext::Suspended);    
     }
 
     assert(!isThreadActive(tid));    
@@ -854,6 +858,8 @@ InOrderCPU::haltThread(ThreadID tid)
     squashThreadInPipeline(tid);   
     haltedThreads.push_back(tid);    
 
+    tcBase(tid)->setStatus(ThreadContext::Halted);    
+
     if (threadModel == SwitchOnCacheMiss) {        
         activateNextReadyContext();    
     }
@@ -872,6 +878,8 @@ InOrderCPU::suspendThread(ThreadID tid)
     deactivateThread(tid);
     suspendedThreads.push_back(tid);    
     thread[tid]->lastSuspend = curTick;    
+
+    tcBase(tid)->setStatus(ThreadContext::Suspended);    
 }
 
 void
diff --git a/src/cpu/inorder/thread_context.hh b/src/cpu/inorder/thread_context.hh
index 820f3077f..6dd5f192f 100644
--- a/src/cpu/inorder/thread_context.hh
+++ b/src/cpu/inorder/thread_context.hh
@@ -64,7 +64,6 @@ class InOrderThreadContext : public ThreadContext
     /** Pointer to the thread state that this TC corrseponds to. */
     InOrderThreadState *thread;
 
-
     /** Returns a pointer to the ITB. */
     /** @TODO: PERF: Should we bind this to a pointer in constructor? */
     TheISA::TLB *getITBPtr() { return cpu->getITBPtr(); }
-- 
cgit v1.2.3


From 4d749472e3cb97ff0421fbf5cbc53d9c89ecfa45 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:28:31 -0500
Subject: inorder: enforce stage bandwidth each stage keeps track of
 insts_processed on a per_thread basis but we should be keeping that on a
 total basis inorder to enforce stage width limits

---
 src/cpu/inorder/first_stage.cc    | 11 ++++++++---
 src/cpu/inorder/pipeline_stage.cc | 22 ++++++++++++++--------
 src/cpu/inorder/pipeline_stage.hh |  7 ++++++-
 3 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index 75e13e559..27831469e 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -175,9 +175,14 @@ FirstStage::processInsts(ThreadID tid)
             ThePipeline::createFrontEndSchedule(inst);
         }
 
-        // Don't let instruction pass to next stage if it hasnt completed
-        // all of it's requests for this stage.
-        all_reqs_completed = processInstSchedule(inst);
+        int reqs_processed = 0;            
+        all_reqs_completed = processInstSchedule(inst, reqs_processed);
+
+        // If the instruction isnt squashed & we've completed one request
+        // Then we can officially count this instruction toward the stage's 
+        // bandwidth count
+        if (reqs_processed > 0)
+            instsProcessed++;
 
         if (!all_reqs_completed) {
             if (new_inst) {
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 55ee3ad12..79f1ff915 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -726,9 +726,11 @@ PipelineStage::tick()
         nextStage->size = 0;
 
     toNextStageIndex = 0;
-
+    
     sortInsts();
 
+    instsProcessed = 0;
+
     processStage(status_change);
 
     if (status_change) {
@@ -873,10 +875,8 @@ PipelineStage::processInsts(ThreadID tid)
     DynInstPtr inst;
     bool last_req_completed = true;
 
-    int insts_processed = 0;
-
     while (insts_available > 0 &&
-           insts_processed < stageWidth &&
+           instsProcessed < stageWidth &&
            (!nextStageValid || canSendInstToStage(stageNum+1)) &&
            last_req_completed) {
         assert(!insts_to_stage.empty());
@@ -901,8 +901,14 @@ PipelineStage::processInsts(ThreadID tid)
             continue;
         }
 
+        int reqs_processed = 0;        
+        last_req_completed = processInstSchedule(inst, reqs_processed);
 
-        last_req_completed = processInstSchedule(inst);
+        // If the instruction isnt squashed & we've completed one request
+        // Then we can officially count this instruction toward the stage's 
+        // bandwidth count
+        if (reqs_processed > 0)
+            instsProcessed++;
 
         // Don't let instruction pass to next stage if it hasnt completed
         // all of it's requests for this stage.
@@ -916,8 +922,6 @@ PipelineStage::processInsts(ThreadID tid)
             break;
         }
 
-        insts_processed++;
-
         insts_to_stage.pop();
 
         //++stageProcessedInsts;
@@ -938,7 +942,7 @@ PipelineStage::processInsts(ThreadID tid)
 }
 
 bool
-PipelineStage::processInstSchedule(DynInstPtr inst)
+PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
 {
     bool last_req_completed = true;
     ThreadID tid = inst->readTid();
@@ -966,6 +970,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                     panic("%i: encountered %s fault!\n",
                           curTick, req->fault->name());
                 }
+
+                reqs_processed++;                
             } else {
                 DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
                         "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index dfe1ac7c3..920734e6a 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -178,7 +178,7 @@ class PipelineStage
     virtual void processInsts(ThreadID tid);
 
     /** Process all resources on an instruction's resource schedule */
-    virtual bool processInstSchedule(DynInstPtr inst);
+    virtual bool processInstSchedule(DynInstPtr inst, int &reqs_processed);
 
     /** Is there room in the next stage buffer for this instruction? */
     virtual bool canSendInstToStage(unsigned stage_num);
@@ -270,6 +270,11 @@ class PipelineStage
     std::vector<DynInstPtr> switchedOutBuffer;
     std::vector<bool> switchedOutValid;
 
+    /** Instructions that we've processed this tick
+     *  NOTE: "Processed" means completed at least 1 instruction request 
+     */
+    unsigned instsProcessed;    
+
     /** Queue of all instructions coming from previous stage on this cycle. */
     std::queue<DynInstPtr> insts[ThePipeline::MaxThreads];
 
-- 
cgit v1.2.3


From ffa9ecb1fa71f1fe89a65975b2c558e312bbfbc8 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:28:51 -0500
Subject: inorder: pipeline stage stats add idle/run/utilization stats for each
 pipeline stage

---
 src/cpu/inorder/cpu.cc            | 15 ++++++++--
 src/cpu/inorder/first_stage.cc    | 14 ++++++---
 src/cpu/inorder/pipeline_stage.cc | 61 ++++++++++++++-------------------------
 src/cpu/inorder/pipeline_stage.hh | 27 +++++++----------
 4 files changed, 55 insertions(+), 62 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index d8fea79d9..4cc9b9f22 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -346,6 +346,11 @@ InOrderCPU::regStats()
         .prereq(maxResReqCount);   
 #endif
 
+    /* Register for each Pipeline Stage */
+    for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) {
+        pipelineStage[stage_num]->regStats();
+    }
+
     /* Register any of the InOrderCPU's stats here.*/
     timesIdled
         .name(name() + ".timesIdled")
@@ -1289,8 +1294,14 @@ InOrderCPU::wakeCPU()
 
     DPRINTF(Activity, "Waking up CPU\n");
 
-    //@todo: figure out how to count idleCycles correctly
-    //idleCycles += (curTick - 1) - lastRunningCycle;
+    Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle);
+
+    idleCycles += extra_cycles;    
+    for (int stage_num = 0; stage_num < NumStages; stage_num++) {
+        pipelineStage[stage_num]->idleCycles += extra_cycles;
+    }    
+
+    numCycles += extra_cycles;
 
     mainEventQueue.schedule(&tickEvent, curTick);
 }
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index 27831469e..c653d152b 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -118,9 +118,9 @@ FirstStage::processStage(bool &status_change)
         status_change =  checkSignalsAndUpdate(tid) || status_change;
     }
 
-    for (int threadFetched = 0; threadFetched < numFetchingThreads;
-         threadFetched++) {
-
+    for (int insts_fetched = 0; 
+         insts_fetched < stageWidth && canSendInstToStage(1); 
+         insts_fetched++) {
         ThreadID tid = getFetchingThread(fetchPolicy);
 
         if (tid >= 0) {
@@ -130,6 +130,13 @@ FirstStage::processStage(bool &status_change)
             DPRINTF(InOrderStage, "No more threads to fetch from.\n");
         }
     }
+
+    if (instsProcessed > 0) {
+        ++runCycles;
+    } else {
+        ++idleCycles;        
+    }
+
 }
 
 //@TODO: Note in documentation, that when you make a pipeline stage change, 
@@ -197,7 +204,6 @@ FirstStage::processInsts(ThreadID tid)
         }
 
         sendInstToNextStage(inst);
-        //++stageProcessedInsts;
     }
 
     // Record that stage has written to the time buffer for activity
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 79f1ff915..e601edfcc 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -72,41 +72,27 @@ PipelineStage::init(Params *params)
 std::string
 PipelineStage::name() const
 {
-    return cpu->name() + ".stage-" + to_string(stageNum);
+     return cpu->name() + ".stage-" + to_string(stageNum);
 }
 
 
 void
 PipelineStage::regStats()
 {
-/*    stageIdleCycles
-        .name(name() + ".IdleCycles")
-        .desc("Number of cycles stage is idle")
-        .prereq(stageIdleCycles);
-    stageBlockedCycles
-        .name(name() + ".BlockedCycles")
-        .desc("Number of cycles stage is blocked")
-        .prereq(stageBlockedCycles);
-    stageRunCycles
-        .name(name() + ".RunCycles")
-        .desc("Number of cycles stage is running")
-        .prereq(stageRunCycles);
-    stageUnblockCycles
-        .name(name() + ".UnblockCycles")
-        .desc("Number of cycles stage is unblocking")
-        .prereq(stageUnblockCycles);
-    stageSquashCycles
-        .name(name() + ".SquashCycles")
-        .desc("Number of cycles stage is squashing")
-        .prereq(stageSquashCycles);
-    stageProcessedInsts
-        .name(name() + ".ProcessedInsts")
-        .desc("Number of instructions handled by stage")
-        .prereq(stageProcessedInsts);
-    stageSquashedInsts
-        .name(name() + ".SquashedInsts")
-        .desc("Number of squashed instructions handled by stage")
-        .prereq(stageSquashedInsts);*/
+   idleCycles
+        .name(name() + ".idleCycles")
+       .desc("Number of cycles 0 instructions are processed.");
+   
+    runCycles
+        .name(name() + ".runCycles")
+        .desc("Number of cycles 1+ instructions are processed.");
+
+    utilization
+        .name(name() + ".utilization")
+        .desc("Percentage of cycles stage was utilized (processing insts).")
+        .precision(6);
+    utilization = (runCycles / cpu->numCycles) * 100;
+    
 }
 
 
@@ -803,6 +789,12 @@ PipelineStage::processStage(bool &status_change)
                 nextStage->size, stageNum + 1);
     }
 
+    if (instsProcessed > 0) {
+        ++runCycles;
+    } else {
+        ++idleCycles;        
+    }
+    
     DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(),
             stageNum);
 
@@ -820,12 +812,6 @@ PipelineStage::processThread(bool &status_change, ThreadID tid)
     //     continue trying to empty skid buffer
     //     check if stall conditions have passed
 
-    if (stageStatus[tid] == Blocked) {
-        ;//++stageBlockedCycles;
-    } else if (stageStatus[tid] == Squashing) {
-        ;//++stageSquashCycles;
-    }
-
     // Stage should try to process as many instructions as its bandwidth
     // will allow, as long as it is not currently blocked.
     if (stageStatus[tid] == Running ||
@@ -867,8 +853,6 @@ PipelineStage::processInsts(ThreadID tid)
     if (insts_available == 0) {
         DPRINTF(InOrderStage, "[tid:%u]: Nothing to do, breaking out"
                 " early.\n",tid);
-        // Should I change the status to idle?
-        //++stageIdleCycles;
         return;
     }
 
@@ -892,8 +876,6 @@ PipelineStage::processInsts(ThreadID tid)
                     "squashed, skipping.\n",
                     tid, inst->seqNum, inst->readPC());
 
-            //++stageSquashedInsts;
-
             insts_to_stage.pop();
 
             --insts_available;
@@ -924,7 +906,6 @@ PipelineStage::processInsts(ThreadID tid)
 
         insts_to_stage.pop();
 
-        //++stageProcessedInsts;
         --insts_available;
     }
 
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index 920734e6a..be3a1093c 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -353,24 +353,19 @@ class PipelineStage
         std::vector<ResReqPtr> resources;
     };
 
-    /** Tracks which stages are telling decode to stall. */
+    /** Tracks stage/resource stalls */
     Stalls stalls[ThePipeline::MaxThreads];
 
-    //@TODO: Use Stats for the pipeline stages
-    /** Stat for total number of idle cycles. */
-    //Stats::Scalar stageIdleCycles;
-    /** Stat for total number of blocked cycles. */
-    //Stats::Scalar stageBlockedCycles;
-    /** Stat for total number of normal running cycles. */
-    //Stats::Scalar stageRunCycles;
-    /** Stat for total number of unblocking cycles. */
-    //Stats::Scalar stageUnblockCycles;
-    /** Stat for total number of squashing cycles. */
-    //Stats::Scalar stageSquashCycles;
-    /** Stat for total number of staged instructions. */
-    //Stats::Scalar stageProcessedInsts;
-    /** Stat for total number of squashed instructions. */
-    //Stats::Scalar stageSquashedInsts;
+    /** Number of cycles 0 instruction(s) are processed. */
+    Stats::Scalar idleCycles;
+
+    /** Number of cycles 1+ instructions are processed. */
+    Stats::Scalar runCycles;
+
+    /** Percentage of cycles 1+ instructions are processed. */
+    Stats::Formula utilization;
+
+
 };
 
 #endif
-- 
cgit v1.2.3


From 0b29c2d057d2d6f4f8b9b7853da91bcb283e805c Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:28:59 -0500
Subject: inorder: ctxt switch stats - m5 line enforcement on use_def.cc,hh

---
 src/cpu/inorder/cpu.cc                       |  26 +++-
 src/cpu/inorder/cpu.hh                       |   5 +
 src/cpu/inorder/pipeline_stage.cc            |   3 +
 src/cpu/inorder/resources/graduation_unit.hh |   2 -
 src/cpu/inorder/resources/use_def.cc         | 216 +++++++++++++++++++--------
 src/cpu/inorder/resources/use_def.hh         |  12 +-
 6 files changed, 196 insertions(+), 68 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 4cc9b9f22..b69fe2e3b 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -189,7 +189,8 @@ InOrderCPU::InOrderCPU(Params *params)
 #endif // DEBUG
       switchCount(0),
       deferRegistration(false/*params->deferRegistration*/),
-      stageTracing(params->stageTracing)
+      stageTracing(params->stageTracing),
+      instsPerSwitch(0)
 {    
     ThreadID active_threads;
     cpu_params = params;
@@ -352,6 +353,15 @@ InOrderCPU::regStats()
     }
 
     /* Register any of the InOrderCPU's stats here.*/
+    instsPerCtxtSwitch
+        .name(name() + ".instsPerContextSwitch")
+        .desc("Instructions Committed Per Context Switch")
+        .prereq(instsPerCtxtSwitch);
+    
+    numCtxtSwitches
+        .name(name() + ".contextSwitches")
+        .desc("Number of context switches");
+            
     timesIdled
         .name(name() + ".timesIdled")
         .desc("Number of times that the entire CPU went into an idle state and"
@@ -719,6 +729,8 @@ InOrderCPU::activateThread(ThreadID tid)
         tcBase(tid)->setStatus(ThreadContext::Active);    
 
         wakeCPU();
+
+        numCtxtSwitches++;        
     }
 }
 
@@ -1056,6 +1068,15 @@ InOrderCPU::addInst(DynInstPtr &inst)
     return --(instList[tid].end());
 }
 
+void 
+InOrderCPU::updateContextSwitchStats()
+{
+    // Set Average Stat Here, then reset to 0    
+    instsPerCtxtSwitch = instsPerSwitch;
+    instsPerSwitch = 0;
+}
+
+    
 void
 InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 {
@@ -1086,6 +1107,9 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
         inst->traceData = NULL;
     }
 
+    // Increment active thread's instruction count
+    instsPerSwitch++;
+    
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
 
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 70013c0f5..6f1f3ee3f 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -707,6 +707,11 @@ class InOrderCPU : public BaseCPU
     /** The cycle that the CPU was last running, used for statistics. */
     Tick lastRunningCycle;
 
+    void updateContextSwitchStats();    
+    unsigned instsPerSwitch;    
+    Stats::Average instsPerCtxtSwitch;    
+    Stats::Scalar numCtxtSwitches;
+    
     /** Update Thread , used for statistic purposes*/
     inline void tickThreadStats();
 
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index e601edfcc..550952947 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -570,6 +570,9 @@ PipelineStage::activateThread(ThreadID tid)
             // Clear switchout buffer
             switchedOutBuffer[tid] = NULL;
             switchedOutValid[tid] = false;            
+
+            // Update any CPU stats based off context switches
+            cpu->updateContextSwitchStats();            
         }        
     }
     
diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh
index ad222b119..7f0db98d0 100644
--- a/src/cpu/inorder/resources/graduation_unit.hh
+++ b/src/cpu/inorder/resources/graduation_unit.hh
@@ -63,8 +63,6 @@ class GraduationUnit : public Resource {
     bool *nonSpecInstActive[ThePipeline::MaxThreads];
 
     InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads];
-
-    /** @todo: Add Resource Stats Here */
 };
 
 #endif //__CPU_INORDER_GRAD_UNIT_HH__
diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc
index 36392d054..a4f3a0d21 100644
--- a/src/cpu/inorder/resources/use_def.cc
+++ b/src/cpu/inorder/resources/use_def.cc
@@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width,
 
 }
 
+void
+UseDefUnit::regStats()
+{
+    uniqueRegsPerSwitch
+        .name(name() + ".uniqueRegsPerSwitch")
+        .desc("Number of Unique Registers Needed Per Context Switch")
+        .prereq(uniqueRegsPerSwitch);
+    
+    Resource::regStats();
+}
+
 ResReqPtr
 UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
@@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst)
     map<int, ResReqPtr>::iterator map_end = reqMap.end();
 
     while (map_it != map_end) {
-        UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>((*map_it).second);
+        UseDefRequest* ud_req = 
+            dynamic_cast<UseDefRequest*>((*map_it).second);
         assert(ud_req);
 
         if (ud_req &&
@@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx)
     // in the pipeline then stall instructions here
     if (*nonSpecInstActive[tid] == true &&
         seq_num > *nonSpecSeqNum[tid]) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is "
-                "non-speculative instruction [sn:%i] has not graduated.\n",
-                tid, seq_num, *nonSpecSeqNum[tid]);
+        DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because"
+                "there is non-speculative instruction [sn:%i] has not "
+                "graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]);
         return;
     } else if (inst->isNonSpeculative()) {
         *nonSpecInstActive[tid] = true;
@@ -121,89 +133,129 @@ UseDefUnit::execute(int slot_idx)
       case ReadSrcReg:
         {
             int reg_idx = inst->_srcRegIdx[ud_idx];
-
-            DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n",
+            
+            DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source "
+                    "register idx %i (reg #%i).\n",
                     tid, ud_idx, reg_idx);
 
-            // Ask register dependency map if it is OK to read from Arch. Reg. File
+            // Ask register dependency map if it is OK to read from Arch. 
+            // Reg. File
             if (regDepMap[tid]->canRead(reg_idx, inst)) {
+                
+                uniqueRegMap[reg_idx] = true;
+
                 if (inst->seqNum <= outReadSeqNum[tid]) {
                     if (reg_idx < FP_Base_DepTag) {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n",
-                                tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid()));
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i"
+                                "from Register File:%i.\n",
+                                tid, 
+                                reg_idx, 
+                                cpu->readIntReg(reg_idx,inst->readTid()));
                         inst->setIntSrc(ud_idx,
-                                        cpu->readIntReg(reg_idx,inst->readTid()));
+                                        cpu->readIntReg(reg_idx,
+                                                        inst->readTid()));
                     } else if (reg_idx < Ctrl_Base_DepTag) {
                         reg_idx -= FP_Base_DepTag;
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i"
+                                "from Register File:%x (%08f).\n",
                                 tid,
                                 reg_idx,
-                                cpu->readFloatRegBits(reg_idx, inst->readTid()),
-                                cpu->readFloatReg(reg_idx, inst->readTid()));
+                                cpu->readFloatRegBits(reg_idx, 
+                                                      inst->readTid()),
+                                cpu->readFloatReg(reg_idx, 
+                                                  inst->readTid()));
 
                         inst->setFloatSrc(ud_idx,
-                                          cpu->readFloatReg(reg_idx, inst->readTid()));
+                                          cpu->readFloatReg(reg_idx, 
+                                                            inst->readTid()));
                     } else {
                         reg_idx -= Ctrl_Base_DepTag;
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n",
-                                tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid()));
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i "
+                                "from Register File:%i.\n",
+                                tid, 
+                                reg_idx, 
+                                cpu->readMiscReg(reg_idx, 
+                                                 inst->readTid()));
                         inst->setIntSrc(ud_idx,
-                                        cpu->readMiscReg(reg_idx, inst->readTid()));
+                                        cpu->readMiscReg(reg_idx, 
+                                                         inst->readTid()));
                     }
 
                     outReadSeqNum[tid] = maxSeqNum;
 
                     ud_req->done();
                 } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
-                            " registers yet.\n", tid, outReadSeqNum[tid]);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n",
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because "
+                            "of [sn:%i] hasnt read it's registers yet.\n", 
+                            tid, outReadSeqNum[tid]);
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                            "[sn:%i] to write\n",
                             tid, outReadSeqNum[tid]);
                 }
 
             } else {
                 // Look for forwarding opportunities
-                DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst);
+                DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, 
+                                                                     ud_idx, 
+                                                                     inst);
 
                 if (forward_inst) {
 
                     if (inst->seqNum <= outReadSeqNum[tid]) {
-                        int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx);
+                        int dest_reg_idx = 
+                            forward_inst->getDestIdxNum(reg_idx);
 
                         if (reg_idx < FP_Base_DepTag) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                     "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readIntResult(dest_reg_idx) ,
-                                    forward_inst->seqNum, inst->seqNum, ud_idx);
-                            inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
+                                    tid, 
+                                    forward_inst->readIntResult(dest_reg_idx),
+                                    forward_inst->seqNum, 
+                                    inst->seqNum, ud_idx);
+                            inst->setIntSrc(ud_idx, 
+                                            forward_inst->
+                                            readIntResult(dest_reg_idx));
                         } else if (reg_idx < Ctrl_Base_DepTag) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                     "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readFloatResult(dest_reg_idx) ,
+                                    tid, 
+                                    forward_inst->readFloatResult(dest_reg_idx),
                                     forward_inst->seqNum, inst->seqNum, ud_idx);
                             inst->setFloatSrc(ud_idx,
-                                              forward_inst->readFloatResult(dest_reg_idx));
+                                              forward_inst->
+                                              readFloatResult(dest_reg_idx));
                         } else {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                     "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readIntResult(dest_reg_idx) ,
-                                    forward_inst->seqNum, inst->seqNum, ud_idx);
-                            inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
+                                    tid, 
+                                    forward_inst->readIntResult(dest_reg_idx),
+                                    forward_inst->seqNum, 
+                                    inst->seqNum, ud_idx);
+                            inst->setIntSrc(ud_idx, 
+                                            forward_inst->
+                                            readIntResult(dest_reg_idx));
                         }
 
                         outReadSeqNum[tid] = maxSeqNum;
 
                         ud_req->done();
                     } else {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read "
+                                "because of [sn:%i] hasnt read it's"
                                 " registers yet.\n", tid, outReadSeqNum[tid]);
-                        DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n",
+                        DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                                "[sn:%i] to forward\n",
                                 tid, outReadSeqNum[tid]);
                     }
                 } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n",
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i"
+                            "is not ready to read.\n",
                             tid, reg_idx);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n",
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read "
+                            "register (idx=%i)\n",
                             tid, reg_idx);
                     outReadSeqNum[tid] = inst->seqNum;
                 }
@@ -216,12 +268,14 @@ UseDefUnit::execute(int slot_idx)
             int reg_idx = inst->_destRegIdx[ud_idx];
 
             if (regDepMap[tid]->canWrite(reg_idx, inst)) {
-                DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n",
+                DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &"
+                        "Attempting to write to Register File.\n",
                         tid, reg_idx);
-
+                uniqueRegMap[reg_idx] = true;
                 if (inst->seqNum <= outReadSeqNum[tid]) {
                     if (reg_idx < FP_Base_DepTag) {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result "
+                                "0x%x to register idx %i.\n",
                                 tid, inst->readIntResult(ud_idx), reg_idx);
 
                         // Remove Dependencies
@@ -236,33 +290,54 @@ UseDefUnit::execute(int slot_idx)
 
                         reg_idx -= FP_Base_DepTag;
 
-                        if (inst->resultType(ud_idx) == InOrderDynInst::Integer) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
-
-                            cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here
+                        if (inst->resultType(ud_idx) == 
+                            InOrderDynInst::Integer) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, 
+                                    inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
+
+                            // Check for FloatRegBits Here
+                            cpu->setFloatRegBits(reg_idx, 
                                              inst->readIntResult(ud_idx),
                                              inst->readTid());
-                        } else if (inst->resultType(ud_idx) == InOrderDynInst::Float) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
+                        } else if (inst->resultType(ud_idx) == 
+                                   InOrderDynInst::Float) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
 
                             cpu->setFloatReg(reg_idx,
                                              inst->readFloatResult(ud_idx),
                                              inst->readTid());
-                        } else if (inst->resultType(ud_idx) == InOrderDynInst::Double) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
-
-                            cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here
+                        } else if (inst->resultType(ud_idx) == 
+                                   InOrderDynInst::Double) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, 
+                                    inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
+
+                            // Check for FloatRegBits Here
+                            cpu->setFloatReg(reg_idx, 
                                              inst->readFloatResult(ud_idx),
                                              inst->readTid());
                         } else {
-                            panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName());
+                            panic("Result Type Not Set For [sn:%i] %s.\n", 
+                                  inst->seqNum, inst->instName());
                         }
 
                     } else {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x "
+                                "to register idx %i.\n",
                                 tid, inst->readIntResult(ud_idx), reg_idx);
 
                         // Remove Dependencies
@@ -279,15 +354,19 @@ UseDefUnit::execute(int slot_idx)
 
                     ud_req->done();
                 } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's"
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because "
+                            "of [sn:%i] hasnt read it's"
                             " registers yet.\n", tid, outReadSeqNum);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n",
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                            "[sn:%i] to read\n",
                             tid, outReadSeqNum);
                 }
             } else {
-                DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n",
+                DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is "
+                        "not ready to write.\n",
                         tid, reg_idx);
-                DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n",
+                DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write "
+                        "register (idx=%i)\n",
                         tid, reg_idx);
                 outWriteSeqNum[tid] = inst->seqNum;
             }
@@ -343,18 +422,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
     }
 
     if (outReadSeqNum[tid] >= squash_seq_num) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid);
+        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", 
+                tid);
         outReadSeqNum[tid] = maxSeqNum;
     } else if (outReadSeqNum[tid] != maxSeqNum) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n",
+        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read "
+                "Seq Num %i\n",
                 tid, outReadSeqNum[tid]);
     }
 
     if (outWriteSeqNum[tid] >= squash_seq_num) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid);
+        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", 
+                tid);
         outWriteSeqNum[tid] = maxSeqNum;
     } else if (outWriteSeqNum[tid] != maxSeqNum) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n",
+        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write "
+                "Seq Num %i\n",
                 tid, outWriteSeqNum[tid]);
     }
 }
+
+void
+UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    uniqueRegsPerSwitch = uniqueRegMap.size();
+    uniqueRegMap.clear();    
+}
diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh
index 6c76d8ab5..41d758dd7 100644
--- a/src/cpu/inorder/resources/use_def.hh
+++ b/src/cpu/inorder/resources/use_def.hh
@@ -68,8 +68,12 @@ class UseDefUnit : public Resource {
     virtual void squash(DynInstPtr inst, int stage_num,
                         InstSeqNum squash_seq_num, ThreadID tid);
 
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);    
+
     const InstSeqNum maxSeqNum;
 
+    void regStats();
+    
   protected:
     RegDepMap *regDepMap[ThePipeline::MaxThreads];
 
@@ -84,14 +88,18 @@ class UseDefUnit : public Resource {
 
     InstSeqNum floatRegSize[ThePipeline::MaxThreads];
 
+    Stats::Average uniqueRegsPerSwitch;
+    std::map<unsigned, bool> uniqueRegMap;    
+
   public:
     class UseDefRequest : public ResourceRequest {
       public:
         typedef ThePipeline::DynInstPtr DynInstPtr;
 
       public:
-        UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx,
-                      int slot_num, unsigned cmd, int use_def_idx)
+        UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, 
+                      int res_idx, int slot_num, unsigned cmd, 
+                      int use_def_idx)
             : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd),
               useDefIdx(use_def_idx)
         { }
-- 
cgit v1.2.3


From 349d86c0e4afb02962c9899bd5a3887ff2c55626 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:29:06 -0500
Subject: inorder-stats: add prereq to basic stat only show requests processed
 when the resource is actually in use

---
 src/cpu/inorder/resource.cc              | 3 ++-
 src/cpu/inorder/resources/inst_buffer.cc | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index 47a9a4b9a..1fd28c939 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -81,7 +81,8 @@ Resource::regStats()
     instReqsProcessed
         .name(name() + ".instReqsProcessed")
         .desc("Number of Instructions Requests that completed in "
-              "this resource.");
+              "this resource.")
+        .prereq(instReqsProcessed);
 }
 
 int
diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc
index bb308b0ea..17b308db0 100644
--- a/src/cpu/inorder/resources/inst_buffer.cc
+++ b/src/cpu/inorder/resources/inst_buffer.cc
@@ -52,7 +52,8 @@ InstBuffer::regStats()
 {
     instsBypassed
         .name(name() + ".instsBypassed")
-        .desc("Number of Instructions Bypassed.");
+        .desc("Number of Instructions Bypassed.")
+        .prereq(instsBypassed);    
 
     Resource::regStats();
 }
-- 
cgit v1.2.3


From 82c5a754e684af6522f339ab30d2c661ee9c220c Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:29:18 -0500
Subject: inorder: recvRetry bug fix - on certain retry requests you can get an
 assertion failure - fix by allowing the request to literally "Retry" itself  
 if it wasnt successful before, and then block any requests   through cache
 port while waiting for the cache to be   made available for access

---
 src/cpu/inorder/resources/cache_unit.cc | 53 ++++++++++-----------------------
 src/cpu/inorder/resources/cache_unit.hh | 16 +---------
 2 files changed, 16 insertions(+), 53 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 4f9ed3eca..65782cb73 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -84,8 +84,7 @@ CacheUnit::CachePort::recvRetry()
 CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
         int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
     : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      retryPkt(NULL), retrySlot(-1), cacheBlocked(false),
-      predecoder(NULL)
+      cachePortBlocked(false), predecoder(NULL)
 {
     cachePort = new CachePort(this);
 
@@ -351,8 +350,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
 void
 CacheUnit::execute(int slot_num)
 {
-    if (cacheBlocked) {
-        DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n");
+    if (cachePortBlocked) {
+        DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
         return;
     }
 
@@ -470,8 +469,7 @@ CacheUnit::prefetch(DynInstPtr inst)
     // Clean-Up cache resource request so
     // other memory insts. can use them
     cache_req->setCompleted();
-    cacheStatus = cacheAccessComplete;
-    cacheBlocked = false;
+    cachePortBlocked = false;
     cache_req->setMemAccPending(false);
     cache_req->setMemAccCompleted();
     inst->unsetMemAddr();
@@ -490,8 +488,7 @@ CacheUnit::writeHint(DynInstPtr inst)
     // Clean-Up cache resource request so
     // other memory insts. can use them
     cache_req->setCompleted();
-    cacheStatus = cacheAccessComplete;
-    cacheBlocked = false;
+    cachePortBlocked = false;
     cache_req->setMemAccPending(false);
     cache_req->setMemAccCompleted();
     inst->unsetMemAddr();
@@ -555,28 +552,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
     if (do_access) {
         if (!cachePort->sendTiming(cache_req->dataPkt)) {
             DPRINTF(InOrderCachePort,
-                    "[tid:%i] [sn:%i] is waiting to retry request\n",
-                    tid, inst->seqNum);
-
-            retrySlot = cache_req->getSlot();
-            retryReq = cache_req;
-            retryPkt = cache_req->dataPkt;
-
-            cacheStatus = cacheWaitRetry;
-
-            //cacheBlocked = true;
-
-            DPRINTF(InOrderStall, "STALL: \n");
-
+                    "[tid:%i] [sn:%i] cannot access cache, because port "
+                    "is blocked. now waiting to retry request\n", tid, 
+                    inst->seqNum);
             cache_req->setCompleted(false);
+            cachePortBlocked = true;
         } else {
             DPRINTF(InOrderCachePort,
                     "[tid:%i] [sn:%i] is now waiting for cache response\n",
                     tid, inst->seqNum);
             cache_req->setCompleted();
             cache_req->setMemAccPending();
-            cacheStatus = cacheWaitResponse;
-            cacheBlocked = false;
+            cachePortBlocked = false;
         }
     } else if (!do_access && memReq->isLLSC()){
         // Store-Conditional instructions complete even if they "failed"
@@ -737,22 +724,12 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
 void
 CacheUnit::recvRetry()
 {
-    DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n",
-            retryReq->inst->readTid(), retryReq->inst->seqNum);
-
-    assert(retryPkt != NULL);
-    assert(cacheBlocked);
-    assert(cacheStatus == cacheWaitRetry);
+    DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n");
+    
+    assert(cachePortBlocked);
 
-    if (cachePort->sendTiming(retryPkt)) {
-        cacheStatus = cacheWaitResponse;
-        retryPkt = NULL;
-        cacheBlocked = false;
-    } else {
-        DPRINTF(InOrderCachePort,
-                "Retry Request for [tid:%i] [sn:%i] failed\n",
-                retryReq->inst->readTid(), retryReq->inst->seqNum);
-    }
+    // Clear the cache port for use again
+    cachePortBlocked = false;
 }
 
 CacheUnitEvent::CacheUnitEvent()
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 26f6859ed..4162102c7 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -119,12 +119,6 @@ class CacheUnit : public Resource
         virtual void recvRetry();
     };
 
-    enum CachePortStatus {
-        cacheWaitResponse,
-        cacheWaitRetry,
-        cacheAccessComplete
-    };
-
     void init();
 
     virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
@@ -188,15 +182,7 @@ class CacheUnit : public Resource
     /** Cache interface. */
     CachePort *cachePort;
 
-    CachePortStatus cacheStatus;
-
-    CacheReqPtr retryReq;
-
-    PacketPtr retryPkt;
-
-    int retrySlot;
-
-    bool cacheBlocked;
+    bool cachePortBlocked;
 
     std::vector<Addr> addrList[ThePipeline::MaxThreads];
 
-- 
cgit v1.2.3


From 002f1b8b7e1d5292828e5157ff971965265140bc Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:29:49 -0500
Subject: inorder: add execution unit stats

---
 src/cpu/inorder/resources/execution_unit.cc | 17 +++++++++++++++++
 src/cpu/inorder/resources/execution_unit.hh |  5 +++++
 2 files changed, 22 insertions(+)

(limited to 'src')

diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc
index 6c44e2456..429291231 100644
--- a/src/cpu/inorder/resources/execution_unit.cc
+++ b/src/cpu/inorder/resources/execution_unit.cc
@@ -54,6 +54,17 @@ ExecutionUnit::regStats()
         .name(name() + ".predictedNotTakenIncorrect")
         .desc("Number of Branches Incorrectly Predicted As Not Taken).");
 
+    lastExecuteCycle = curTick;
+
+    cyclesExecuted
+        .name(name() + ".cyclesExecuted")
+        .desc("Number of Cycles Execution Unit was used.");
+
+    utilization
+        .name(name() + ".utilization")
+        .desc("Utilization of Execution Unit (cycles / totalCycles).");
+    utilization = cyclesExecuted / cpu->numCycles;
+
     Resource::regStats();
 }
 
@@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num)
     {
       case ExecuteInst:
         {
+            if (curTick != lastExecuteCycle) {
+                lastExecuteCycle = curTick;
+                cyclesExecuted++;
+            }
+
+
             if (inst->isMemRef()) {
                 panic("%s not configured to handle memory ops.\n", resName);
             } else if (inst->isControl()) {
diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh
index 46691bbf2..37651e873 100644
--- a/src/cpu/inorder/resources/execution_unit.hh
+++ b/src/cpu/inorder/resources/execution_unit.hh
@@ -71,6 +71,11 @@ class ExecutionUnit : public Resource {
     /////////////////////////////////////////////////////////////////
     Stats::Scalar predictedTakenIncorrect;
     Stats::Scalar predictedNotTakenIncorrect;
+
+    Stats::Scalar cyclesExecuted;
+    Tick lastExecuteCycle;
+
+    Stats::Formula utilization;
 };
 
 
-- 
cgit v1.2.3


From 1a89e8f4cbab3b3a6fd144d3d08dfeaac203f945 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:29:59 -0500
Subject: inorder: user per-thread dummy insts/reqs

---
 src/cpu/inorder/cpu.cc           | 35 +++++++++++++++++++++++++----------
 src/cpu/inorder/cpu.hh           |  7 +++++--
 src/cpu/inorder/resource_pool.cc |  5 -----
 src/cpu/inorder/resource_pool.hh |  3 ---
 4 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index b69fe2e3b..472317362 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -211,6 +211,7 @@ InOrderCPU::InOrderCPU(Params *params)
               "edit your workload size.");
     }
 
+    
     if (active_threads > 1) {
         threadModel = (InOrderCPU::ThreadModel) params->threadModel;
 
@@ -257,6 +258,9 @@ InOrderCPU::InOrderCPU(Params *params)
             Process* dummy_proc = params->workload[0];
             thread[tid] = new Thread(this, tid, dummy_proc);
         }
+        
+        // Eventually set this with parameters...
+        asid[tid] = tid;
 #endif
 
         // Setup the TC that will serve as the interface to the threads/CPU.
@@ -313,14 +317,24 @@ InOrderCPU::InOrderCPU(Params *params)
         isa[tid].clear();
 
         isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/);
+
+        // Define dummy instructions and resource requests to be used.
+        dummyInst[tid] = new InOrderDynInst(this, 
+                                            thread[tid], 
+                                            0, 
+                                            tid, 
+                                            asid[tid]);
+
+        dummyReq[tid] = new ResourceRequest(resPool->getResource(0), 
+                                            dummyInst[tid], 
+                                            0, 
+                                            0, 
+                                            0, 
+                                            0);        
     }
 
     lastRunningCycle = curTick;
 
-    // Define dummy instructions and resource requests to be used.
-    dummyInst = new InOrderDynInst(this, NULL, 0, 0);
-    dummyReq = new ResourceRequest(resPool->getResource(0), NULL, 0, 0, 0, 0);
-
     // Reset CPU to reset state.
 #if FULL_SYSTEM
     Fault resetFault = new ResetFault();
@@ -585,7 +599,7 @@ void
 InOrderCPU::trap(Fault fault, ThreadID tid, int delay)
 {
     //@ Squash Pipeline during TRAP
-    scheduleCpuEvent(Trap, fault, tid, dummyInst, delay);
+    scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay);
 }
 
 void
@@ -747,7 +761,7 @@ InOrderCPU::deactivateContext(ThreadID tid, int delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
 
-    scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst, delay);
+    scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst[tid], delay);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -830,7 +844,8 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
-    scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst, delay);
+    
+    scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst[tid], delay);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -847,7 +862,7 @@ InOrderCPU::activateNextReadyContext(int delay)
     // NOTE: Add 5 to the event priority so that we always activate
     // threads after we've finished deactivating, squashing,etc.
     // other threads
-    scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst, 
+    scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], 
                      delay, 5);
 
     // Be sure to signal that there's some activity so the CPU doesn't
@@ -862,7 +877,7 @@ InOrderCPU::haltContext(ThreadID tid, int delay)
 {
     DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
 
-    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst, delay);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
 
     activityRec.activity();
 }
@@ -885,7 +900,7 @@ InOrderCPU::haltThread(ThreadID tid)
 void
 InOrderCPU::suspendContext(ThreadID tid, int delay)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst, delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
 }
 
 void
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 6f1f3ee3f..dc0164d8f 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -97,6 +97,9 @@ class InOrderCPU : public BaseCPU
     /** CPU ID */
     int cpu_id;
 
+    // SE Mode ASIDs
+    ThreadID asid[ThePipeline::MaxThreads];
+
     /** Type of core that this is */
     std::string coreType;
 
@@ -241,10 +244,10 @@ class InOrderCPU : public BaseCPU
 
     /** Instruction used to signify that there is no *real* instruction in 
         buffer slot */
-    DynInstPtr dummyInst;
+    DynInstPtr dummyInst[ThePipeline::MaxThreads];
 
     /** Used by resources to signify a denied access to a resource. */
-    ResourceRequest *dummyReq;
+    ResourceRequest *dummyReq[ThePipeline::MaxThreads];
 
     /** Identifies the resource id that identifies a fetch
      * access unit.
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 3750d18d6..dd51242a3 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -448,11 +448,6 @@ ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
     }
 }
 
-ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool)
-    : Event((Event::Priority)((unsigned)CPU_Tick_Pri+5)), resPool(_resPool),
-      eventType((InOrderCPU::CPUEventType) Default)
-{ }
-
 ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool,
                                          InOrderCPU::CPUEventType e_type,
                                          DynInstPtr _inst,
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index 3f62d2caa..f61fae4c8 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -85,9 +85,6 @@ class ResourcePool {
         ThreadID tid;
 
       public:
-        /** Constructs a resource event. */
-        ResPoolEvent(ResourcePool *_resPool);
-
         /** Constructs a resource event. */
         ResPoolEvent(ResourcePool *_resPool,
                      InOrderCPU::CPUEventType e_type,
-- 
cgit v1.2.3


From f3bc2df663cccd7db7a4ba87acfc2d0137a5ca02 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:08 -0500
Subject: inorder: object cleanup in destructors

---
 src/cpu/inorder/cpu.cc                      |  5 +++++
 src/cpu/inorder/cpu.hh                      |  4 +++-
 src/cpu/inorder/resource.cc                 |  1 +
 src/cpu/inorder/resource_pool.cc            | 12 ++++++++++++
 src/cpu/inorder/resource_pool.hh            |  2 +-
 src/cpu/inorder/resources/cache_unit.hh     |  1 -
 src/cpu/inorder/resources/execution_unit.hh |  1 -
 src/cpu/inorder/resources/fetch_seq_unit.cc |  5 +++++
 src/cpu/inorder/resources/fetch_seq_unit.hh |  4 ++--
 src/cpu/inorder/resources/mult_div_unit.hh  |  1 -
 10 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 472317362..a3b203559 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -347,6 +347,11 @@ InOrderCPU::InOrderCPU(Params *params)
     scheduleTickEvent(0);
 }
 
+InOrderCPU::~InOrderCPU()
+{
+    delete resPool;
+}
+
 
 void
 InOrderCPU::regStats()
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index dc0164d8f..d8424397b 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -93,7 +93,9 @@ class InOrderCPU : public BaseCPU
   public:
     /** Constructs a CPU with the given parameters. */
     InOrderCPU(Params *params);
-
+    /* Destructor */
+    ~InOrderCPU();
+    
     /** CPU ID */
     int cpu_id;
 
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index 1fd28c939..e5fd4f70e 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -47,6 +47,7 @@ Resource::Resource(string res_name, int res_id, int res_width,
 Resource::~Resource()
 {
     delete [] resourceEvent;
+    delete deniedReq;    
 }
 
 
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index dd51242a3..1f15a2c96 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -91,6 +91,18 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
                                        0, _cpu, params));
 }
 
+ResourcePool::~ResourcePool()
+{
+    cout << "Deleting resources ..." << endl;
+    
+    for (int i=0; i < resources.size(); i++) {
+        DPRINTF(Resource, "Deleting resource: %s.\n", resources[i]->name());
+        
+        delete resources[i];
+    }    
+}
+
+
 void
 ResourcePool::init()
 {
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index f61fae4c8..ce7167b87 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -122,7 +122,7 @@ class ResourcePool {
 
   public:
     ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~ResourcePool() {}
+    ~ResourcePool();    
 
     std::string name();
 
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 4162102c7..50cb47519 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -62,7 +62,6 @@ class CacheUnit : public Resource
   public:
     CacheUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~CacheUnit() {}
 
     enum Command {
         InitiateFetch,
diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh
index 37651e873..b9cf1d428 100644
--- a/src/cpu/inorder/resources/execution_unit.hh
+++ b/src/cpu/inorder/resources/execution_unit.hh
@@ -52,7 +52,6 @@ class ExecutionUnit : public Resource {
   public:
     ExecutionUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~ExecutionUnit() {}
 
   public:
     virtual void regStats();
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc
index ba86a91f0..03663881c 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
     }
 }
 
+FetchSeqUnit::~FetchSeqUnit()
+{
+    delete [] resourceEvent;
+}
+
 void
 FetchSeqUnit::init()
 {
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh
index 3283e0330..289e150aa 100644
--- a/src/cpu/inorder/resources/fetch_seq_unit.hh
+++ b/src/cpu/inorder/resources/fetch_seq_unit.hh
@@ -54,8 +54,8 @@ class FetchSeqUnit : public Resource {
   public:
     FetchSeqUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~FetchSeqUnit() {}
-
+    virtual ~FetchSeqUnit();
+    
     virtual void init();
     virtual void activateThread(ThreadID tid);
     virtual void deactivateThread(ThreadID tid);
diff --git a/src/cpu/inorder/resources/mult_div_unit.hh b/src/cpu/inorder/resources/mult_div_unit.hh
index d3dd0260d..19688b09f 100644
--- a/src/cpu/inorder/resources/mult_div_unit.hh
+++ b/src/cpu/inorder/resources/mult_div_unit.hh
@@ -57,7 +57,6 @@ class MultDivUnit : public Resource {
   public:
     MultDivUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~MultDivUnit() {}
 
   public:
     /** Override default Resource getSlot(). Will only getSlot if
-- 
cgit v1.2.3


From ea8909925fd0e7a33feabc9e17f83b85cd7c6039 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:24 -0500
Subject: inorder: add activity stats

---
 src/cpu/inorder/cpu.cc                  | 25 +++++++++++++++++++++----
 src/cpu/inorder/cpu.hh                  |  8 +++++++-
 src/cpu/inorder/first_stage.cc          |  4 +++-
 src/cpu/inorder/pipeline_stage.cc       |  6 +++++-
 src/cpu/inorder/pipeline_stage.hh       |  2 ++
 src/cpu/inorder/resources/cache_unit.cc | 16 +++++++++++-----
 6 files changed, 49 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index a3b203559..e864c8c86 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -389,9 +389,17 @@ InOrderCPU::regStats()
 
     idleCycles
         .name(name() + ".idleCycles")
-        .desc("Total number of cycles that the CPU has spent unscheduled due "
-              "to idling")
-        .prereq(idleCycles);
+        .desc("Number of cycles cpu's stages were not processed");
+
+    runCycles
+        .name(name() + ".runCycles")
+        .desc("Number of cycles cpu stages are processed.");
+
+    activity
+        .name(name() + ".activity")
+        .desc("Percentage of cycles cpu is active")
+        .precision(6);
+    activity = (runCycles / numCycles) * 100;
 
     threadCycles
         .init(numThreads)
@@ -463,18 +471,27 @@ InOrderCPU::tick()
 
     ++numCycles;
 
+    bool pipes_idle = true;
+    
     //Tick each of the stages
     for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
         pipelineStage[stNum]->tick();
+
+        pipes_idle = pipes_idle && pipelineStage[stNum]->idle;
     }
 
+    if (pipes_idle)
+        idleCycles++;
+    else
+        runCycles++;
+    
     // Now advance the time buffers one tick
     timeBuffer.advance();
     for (int sqNum=0; sqNum < NumStages - 1; sqNum++) {
         stageQueue[sqNum]->advance();
     }
     activityRec.advance();
-
+   
     // Any squashed requests, events, or insts then remove them now
     cleanUpRemovedReqs();
     cleanUpRemovedEvents();
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index d8424397b..253b5b18f 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -729,9 +729,15 @@ class InOrderCPU : public BaseCPU
     /** Stat for total number of times the CPU is descheduled. */
     Stats::Scalar timesIdled;
 
-    /** Stat for total number of cycles the CPU spends descheduled. */
+    /** Stat for total number of cycles the CPU spends descheduled or no stages active. */
     Stats::Scalar idleCycles;
 
+    /** Stat for total number of cycles the CPU is active. */
+    Stats::Scalar runCycles;
+
+    /** Percentage of cycles a stage was active */
+    Stats::Formula activity;
+
     /** Stat for the number of committed instructions per thread. */
     Stats::Vector committedInsts;
 
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc
index c653d152b..658ce37d3 100644
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -133,8 +133,10 @@ FirstStage::processStage(bool &status_change)
 
     if (instsProcessed > 0) {
         ++runCycles;
+        idle = false;        
     } else {
-        ++idleCycles;        
+        ++idleCycles;
+        idle = true;        
     }
 
 }
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 550952947..c991fe1bd 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -42,7 +42,7 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num)
     : stageNum(stage_num), stageWidth(ThePipeline::StageWidth),
       numThreads(ThePipeline::MaxThreads), _status(Inactive),
       stageBufferMax(ThePipeline::interStageBuffSize[stage_num]),
-      prevStageValid(false), nextStageValid(false)
+      prevStageValid(false), nextStageValid(false), idle(false)
 {
     switchedOutBuffer.resize(ThePipeline::MaxThreads);
     switchedOutValid.resize(ThePipeline::MaxThreads);
@@ -707,6 +707,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
 void
 PipelineStage::tick()
 {
+    idle = false;
+    
     wroteToTimeBuffer = false;
 
     bool status_change = false;
@@ -794,8 +796,10 @@ PipelineStage::processStage(bool &status_change)
 
     if (instsProcessed > 0) {
         ++runCycles;
+        idle = false;        
     } else {
         ++idleCycles;        
+        idle = true;        
     }
     
     DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(),
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh
index be3a1093c..6c9cf0d99 100644
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -347,6 +347,8 @@ class PipelineStage
     /** Is Next Stage Valid? */
     bool nextStageValid;
 
+    bool idle;
+    
     /** Source of possible stalls. */
     struct Stalls {
         bool stage[ThePipeline::NumStages];
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 65782cb73..275d9a7e8 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -143,7 +143,8 @@ CacheUnit::getSlot(DynInstPtr inst)
     Addr req_addr = inst->getMemAddr();
 
     if (resName == "icache_port" ||
-        find(addrList[tid].begin(), addrList[tid].end(), req_addr) == addrList[tid].end()) {
+        find(addrList[tid].begin(), addrList[tid].end(), req_addr) == 
+        addrList[tid].end()) {
 
         int new_slot = Resource::getSlot(inst);
 
@@ -171,8 +172,9 @@ CacheUnit::freeSlot(int slot_num)
 {
     ThreadID tid = reqMap[slot_num]->inst->readTid();
 
-    vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(),
-            reqMap[slot_num]->inst->getMemAddr());
+    vector<Addr>::iterator vect_it = 
+        find(addrList[tid].begin(), addrList[tid].end(),
+             reqMap[slot_num]->inst->getMemAddr());
     assert(vect_it != addrList[tid].end());
 
     DPRINTF(InOrderCachePort,
@@ -533,8 +535,6 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
         }
     }
 
-    cache_req->dataPkt->time = curTick;
-
     bool do_access = true;  // flag to suppress cache access
 
     Request *memReq = cache_req->dataPkt->req;
@@ -590,6 +590,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
 {
     // Cast to correct packet type
     CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
+             
     assert(cache_pkt);
 
     if (cache_pkt->cacheReq->isSquashed()) {
@@ -600,6 +601,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
 
         cache_pkt->cacheReq->done();
         delete cache_pkt;
+
+        cpu->wakeCPU();
+
         return;
     }
 
@@ -730,6 +734,8 @@ CacheUnit::recvRetry()
 
     // Clear the cache port for use again
     cachePortBlocked = false;
+
+    cpu->wakeCPU();
 }
 
 CacheUnitEvent::CacheUnitEvent()
-- 
cgit v1.2.3


From 6939482c49b489ad8811364ec52ad10ae421fb44 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:35 -0500
Subject: inorder: implement split loads

---
 src/cpu/inorder/inorder_dyn_inst.cc     |  10 +-
 src/cpu/inorder/inorder_dyn_inst.hh     |  18 +-
 src/cpu/inorder/pipeline_traits.hh      |   6 +-
 src/cpu/inorder/resource.cc             |  13 +-
 src/cpu/inorder/resource_pool.cc        |  19 ++
 src/cpu/inorder/resource_pool.hh        |   1 +
 src/cpu/inorder/resources/cache_unit.cc | 312 +++++++++++++++++++++++++++++---
 src/cpu/inorder/resources/cache_unit.hh |  36 +++-
 8 files changed, 374 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 79f8de05d..9c0313721 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -111,7 +111,11 @@ InOrderDynInst::initVars()
 {
     fetchMemReq = NULL;
     dataMemReq = NULL;
-
+    splitMemData = NULL;
+    split2ndAccess = false;
+    splitInst = false;
+    splitFinishCnt = 0;
+    
     effAddr = 0;
     physEffAddr = 0;
 
@@ -187,6 +191,10 @@ InOrderDynInst::~InOrderDynInst()
         delete traceData;
     }
 
+    if (splitMemData) {
+        delete splitMemData;
+    }
+    
     fault = NoFault;
 
     --instcount;
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index b573c1029..6f5b7c0e9 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -330,6 +330,19 @@ class InOrderDynInst : public FastAlloc, public RefCounted
   public:
     Tick memTime;
 
+    PacketDataPtr splitMemData;
+    RequestPtr splitMemReq;    
+    int splitTotalSize;
+    int split2ndSize;
+    Addr split2ndAddr;
+    bool split2ndAccess;
+    uint8_t split2ndData;
+    PacketDataPtr split2ndDataPtr;
+    unsigned split2ndFlags;
+    bool splitInst;
+    int splitFinishCnt;
+    
+    
     ////////////////////////////////////////////////////////////
     //
     //  BASE INSTRUCTION INFORMATION.
@@ -468,7 +481,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted
         if (!resSched.empty()) {
             ThePipeline::ScheduleEntry* sked = resSched.top();
             resSched.pop();
-            delete sked;
+            if (sked != 0) {
+                delete sked;
+                
+            }            
         }
     }
 
diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh
index ddc8a3ad7..f039b9e5d 100644
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@@ -53,8 +53,8 @@ namespace ThePipeline {
     const unsigned StageWidth = 1;
     const unsigned BackEndStartStage = 2;
 
-    // Enumerated List of Resources The Pipeline Uses
-    enum ResourceList {
+    // List of Resources The Pipeline Uses
+    enum ResourceId {
        FetchSeq = 0,
        ICache,
        Decode,
@@ -94,6 +94,7 @@ namespace ThePipeline {
             stageNum(stage_num), resNum(res_num), cmd(_cmd),
             idx(_idx), priority(_priority)
         { }
+
         virtual ~ScheduleEntry(){}
 
         // Stage number to perform this service.
@@ -159,7 +160,6 @@ namespace ThePipeline {
                 stageNum, nextTaskPriority++, unit, request, param
             ));
         }
-
     };
 };
 
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index e5fd4f70e..dcf5f3117 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -262,15 +262,22 @@ Resource::findRequest(DynInstPtr inst)
     map<int, ResReqPtr>::iterator map_it = reqMap.begin();
     map<int, ResReqPtr>::iterator map_end = reqMap.end();
 
+    bool found = false;
+    ResReqPtr req = NULL;
+    
     while (map_it != map_end) {
         if ((*map_it).second &&
-            (*map_it).second->getInst() == inst) {
-            return (*map_it).second;
+            (*map_it).second->getInst() == inst) {            
+            req = (*map_it).second;
+            //return (*map_it).second;
+            assert(found == false);
+            found = true;            
         }
         map_it++;
     }
 
-    return NULL;
+    return req;    
+    //return NULL;
 }
 
 void
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 1f15a2c96..74bf4f03b 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -181,6 +181,25 @@ ResourcePool::getResIdx(const std::string &res_name)
             return idx;
     }
 
+    panic("Can't find resource idx for: %s\n", res_name);
+    return 0;
+}
+
+unsigned
+ResourcePool::getResIdx(const ThePipeline::ResourceId &res_id)
+{
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        if (resources[idx]->getId() == res_id)
+            return idx;
+    }
+
+    // todo: change return value to int and return a -1 here
+    //       maybe even have enumerated type
+    //       panic for now...
+    panic("Can't find resource idx for: %i\n", res_id);
+
     return 0;
 }
 
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh
index ce7167b87..60d35ab61 100644
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -141,6 +141,7 @@ class ResourcePool {
 
     /** Returns a specific resource. */
     unsigned getResIdx(const std::string &res_name);
+    unsigned getResIdx(const ThePipeline::ResourceId &res_id);
 
     /** Returns a pointer to a resource */
     Resource* getResource(int res_idx) { return resources[res_idx]; }
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 275d9a7e8..85ef18a55 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -40,6 +40,7 @@
 #include "cpu/inorder/resources/cache_unit.hh"
 #include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/cpu.hh"
+#include "cpu/inorder/resource_pool.hh"
 #include "mem/request.hh"
 
 using namespace std;
@@ -136,7 +137,9 @@ CacheUnit::getSlot(DynInstPtr inst)
         return -1;
     }
 
-    if (!inst->validMemAddr()) {
+    // For a Split-Load, the instruction would have processed once already
+    // causing the address to be unset.
+    if (!inst->validMemAddr() && !inst->splitInst) {
         panic("Mem. Addr. must be set before requesting cache access\n");
     }
 
@@ -159,12 +162,24 @@ CacheUnit::getSlot(DynInstPtr inst)
                 inst->readTid(), inst->seqNum, req_addr);
         return new_slot;
     } else {
-        DPRINTF(InOrderCachePort,
+        // Allow same instruction multiple accesses to same address
+        if (addrMap[tid][req_addr] == inst->seqNum) {
+            int new_slot = Resource::getSlot(inst);
+        
+            if (new_slot == -1)
+                return -1;     
+
+            return new_slot;       
+        } else {                    
+            DPRINTF(InOrderCachePort,
                 "[tid:%i] Denying request because there is an outstanding"
                 " request to/for addr. %08p. by [sn:%i] @ tick %i\n",
                 inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime);
-        return -1;
+            return -1;
+        }        
     }
+
+    return -1;   
 }
 
 void
@@ -175,17 +190,69 @@ CacheUnit::freeSlot(int slot_num)
     vector<Addr>::iterator vect_it = 
         find(addrList[tid].begin(), addrList[tid].end(),
              reqMap[slot_num]->inst->getMemAddr());
-    assert(vect_it != addrList[tid].end());
+    
+    assert(vect_it != addrList[tid].end() || 
+           reqMap[slot_num]->inst->splitInst);
 
     DPRINTF(InOrderCachePort,
             "[tid:%i]: Address %08p removed from dependency list\n",
             reqMap[slot_num]->inst->readTid(), (*vect_it));
 
-    addrList[tid].erase(vect_it);
+    if (vect_it != addrList[tid].end()) {
+        
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: Address %08p removed from dependency list\n",
+                reqMap[slot_num]->inst->readTid(), (*vect_it));
+ 
+        addrList[tid].erase(vect_it);
+    }   
 
     Resource::freeSlot(slot_num);
 }
 
+ResReqPtr
+CacheUnit::findRequest(DynInstPtr inst)
+{
+    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
+    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+
+    while (map_it != map_end) {
+        CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
+        assert(cache_req);
+
+        if (cache_req &&
+            cache_req->getInst() == inst &&
+            cache_req->instIdx == inst->resSched.top()->idx) {
+            return cache_req;
+        }
+        map_it++;
+    }
+
+    return NULL;
+}
+
+ResReqPtr
+CacheUnit::findSplitRequest(DynInstPtr inst, int idx)
+{
+    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
+    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+
+    while (map_it != map_end) {
+        CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
+        assert(cache_req);
+
+        if (cache_req &&
+            cache_req->getInst() == inst &&
+            cache_req->instIdx == idx) {
+            return cache_req;
+        }
+        map_it++;
+    }
+
+    return NULL;
+}
+
+
 ResReqPtr
 CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                      int slot_num, unsigned cmd)
@@ -200,6 +267,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
 
     switch (sched_entry->cmd)
     {
+      case InitSecondSplitRead:
+        pkt_cmd = MemCmd::ReadReq;
+
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: Read request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->split2ndAddr);
+        break;
+
       case InitiateReadData:
         pkt_cmd = MemCmd::ReadReq;
 
@@ -231,7 +306,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
 
     return new CacheRequest(this, inst, stage_num, id, slot_num,
                             sched_entry->cmd, 0, pkt_cmd,
-                            0/*flags*/, this->cpu->readCpuId());
+                            0/*flags*/, this->cpu->readCpuId(),
+                            inst->resSched.top()->idx);
 }
 
 void
@@ -242,7 +318,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
 
     // Check to see if this instruction is requesting the same command
     // or a different one
-    if (cache_req->cmd != inst->resSched.top()->cmd) {
+    if (cache_req->cmd != inst->resSched.top()->cmd &&
+        cache_req->instIdx == inst->resSched.top()->idx) {
         // If different, then update command in the request
         cache_req->cmd = inst->resSched.top()->cmd;
         DPRINTF(InOrderCachePort,
@@ -250,7 +327,7 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
                 "instruction\n ", inst->readTid(), inst->seqNum);
 
         service_request = true;
-    } else {
+    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead) {        
         // If same command, just check to see if memory access was completed
         // but dont try to re-execute
         DPRINTF(InOrderCachePort,
@@ -276,12 +353,25 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
                                             cpu->readCpuId(), inst->readTid());
             cache_req->memReq = inst->fetchMemReq;
     } else {
-            inst->dataMemReq = new Request(inst->readTid(), aligned_addr,
+        if (!cache_req->is2ndSplit()) {            
+            inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr,
                                            acc_size, flags, inst->readPC(),
                                            cpu->readCpuId(), inst->readTid());
             cache_req->memReq = inst->dataMemReq;
+        } else {
+            assert(inst->splitInst);
+            
+            inst->splitMemReq = new Request(cpu->asid[tid], 
+                                            inst->split2ndAddr,
+                                            acc_size, 
+                                            flags, 
+                                            inst->readPC(),
+                                            cpu->readCpuId(), 
+                                            tid);
+            cache_req->memReq = inst->splitMemReq;            
+        }
     }
-
+    
 
     cache_req->fault =
         _tlb->translateAtomic(cache_req->memReq,
@@ -318,14 +408,94 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
     CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
     assert(cache_req);
 
-    int acc_size =  sizeof(T);
-    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read);
+    // The block size of our peer
+    unsigned blockSize = this->cachePort->peerBlockSize();
+
+    //The size of the data we're trying to read.
+    int dataSize = sizeof(T);
+
+    if (inst->split2ndAccess) {     
+        dataSize = inst->split2ndSize;
+        cache_req->splitAccess = true;        
+        cache_req->split2ndAccess = true;
+        
+        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+                inst->getMemAddr(), inst->split2ndAddr);       
+    }  
+    
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    
+    if (secondAddr > addr && !inst->split2ndAccess) {
+        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+                addr, secondAddr);       
+        
+        // Save All "Total" Split Information
+        // ==============================
+        inst->splitInst = true;        
+        inst->splitMemData = new uint8_t[dataSize];
+        inst->splitTotalSize = dataSize;
+        
+
+        // Schedule Split Read/Complete for Instruction
+        // ==============================
+        int stage_num = cache_req->getStageNum();
+        
+        int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+        
+        inst->resSched.push(new ScheduleEntry(stage_num, 
+                                              stage_pri, 
+                                              cpu->resPool->getResIdx(DCache),
+                                              CacheUnit::InitSecondSplitRead,
+                                              1)
+            );
+
+        inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                              1/*stage_pri*/, 
+                                              cpu->resPool->getResIdx(DCache),
+                                              CacheUnit::CompleteSecondSplitRead, 1)
+            );
+
+
+        // Split Information for First Access
+        // ==============================
+        dataSize = secondAddr - addr;
+        cache_req->splitAccess = true;
+
+        // Split Information for Second Access
+        // ==============================
+        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndAddr = secondAddr;            
+        inst->split2ndDataPtr = inst->splitMemData + dataSize;            
+        inst->split2ndFlags = flags;        
+    }
+    
+    //cout << "h1" << endl;
+    
+    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read);
+
+    //cout << "h2" << endl;
 
     if (cache_req->fault == NoFault) {
-        cache_req->reqData = new uint8_t[acc_size];
-        doCacheAccess(inst, NULL);
+        if (!cache_req->splitAccess) {            
+            cache_req->reqData = new uint8_t[dataSize];
+            doCacheAccess(inst, NULL);
+        } else {
+            if (!inst->split2ndAccess) {                
+                cache_req->reqData = inst->splitMemData;
+            } else {
+                cache_req->reqData = inst->split2ndDataPtr;                
+            }
+            
+            doCacheAccess(inst, NULL, cache_req);            
+        }        
     }
 
+    //cout << "h3" << endl;
+
     return cache_req->fault;
 }
 
@@ -337,6 +507,20 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
     CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
     assert(cache_req);
 
+    // The block size of our peer
+    unsigned blockSize = this->cachePort->peerBlockSize();
+
+    //The size of the data we're trying to read.
+    int dataSize = sizeof(T);
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    if (secondAddr > addr) {
+        assert(0 && "Need Split Write Code!");
+    }    
+
     int acc_size =  sizeof(T);
     doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
 
@@ -364,6 +548,8 @@ CacheUnit::execute(int slot_num)
 #if TRACING_ON
     ThreadID tid = inst->readTid();
     int seq_num = inst->seqNum;
+    std::string acc_type = "write";
+    
 #endif
 
     cache_req->fault = NoFault;
@@ -395,10 +581,14 @@ CacheUnit::execute(int slot_num)
         }
 
       case InitiateReadData:
+#if TRACING_ON
+        acc_type = "read";
+#endif        
       case InitiateWriteData:
+            
         DPRINTF(InOrderCachePort,
-                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
-                tid, name(), cache_req->inst->getMemAddr());
+                "[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n",
+                tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr());
 
         inst->setCurResSlot(slot_num);
 
@@ -406,10 +596,31 @@ CacheUnit::execute(int slot_num)
             inst->execute();
         } else {
             inst->initiateAcc();
+            //if (inst->splitAccess) {
+            //  assert(0 && " Marked as spill inst");                
+            //}
         }
+        
+        break;
 
+      case InitSecondSplitRead:
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n",
+                tid, inst->seqNum, name(), cache_req->inst->split2ndAddr);
+        inst->split2ndAccess = true;
+        read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);        
         break;
 
+      case InitSecondSplitWrite:
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n",
+                tid, inst->seqNum, name(), cache_req->inst->getMemAddr());
+        assert(0);        
+        inst->split2ndAccess = true;
+        //write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);        
+        break;
+
+
       case CompleteFetch:
         if (cache_req->isMemAccComplete()) {
             DPRINTF(InOrderCachePort,
@@ -425,7 +636,7 @@ CacheUnit::execute(int slot_num)
             cache_req->done();
         } else {
             DPRINTF(InOrderCachePort,
-                    "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n",
+                     "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n",
                     tid, inst->seqNum);
             DPRINTF(InOrderStall,
                     "STALL: [tid:%i]: Fetch miss from %08p\n",
@@ -454,6 +665,24 @@ CacheUnit::execute(int slot_num)
         }
         break;
 
+      case CompleteSecondSplitRead:
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n",
+                tid, inst->seqNum);
+
+        if (cache_req->isMemAccComplete() ||
+            inst->isDataPrefetch() ||
+            inst->isInstPrefetch()) {
+            cache_req->setMemStall(false);            
+            cache_req->done();
+        } else {
+            DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
+                    tid, cache_req->inst->split2ndAddr);
+            cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
+        }
+        break;
+        
       default:
         fatal("Unrecognized command to %s", resName);
     }
@@ -498,15 +727,21 @@ CacheUnit::writeHint(DynInstPtr inst)
 
 // @TODO: Split into doCacheRead() and doCacheWrite()
 Fault
-CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
+CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req)
 {
     Fault fault = NoFault;
 #if TRACING_ON
     ThreadID tid = inst->readTid();
 #endif
 
-    CacheReqPtr cache_req
-        = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
+    CacheReqPtr cache_req;
+    
+    if (split_req == NULL) {        
+        cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
+    } else{
+        cache_req = split_req;
+    }        
+
     assert(cache_req);
 
     // Check for LL/SC and if so change command
@@ -522,7 +757,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
     }
 
     cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd,
-                                            Packet::Broadcast);
+                                            Packet::Broadcast, cache_req->instIdx);
 
     if (cache_req->dataPkt->isRead()) {
         cache_req->dataPkt->dataStatic(cache_req->reqData);
@@ -615,7 +850,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
 
     // Cast to correct request type
     CacheRequest *cache_req = dynamic_cast<CacheReqPtr>(
-        findRequest(cache_pkt->cacheReq->getInst()));
+        findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
+
+    if (!cache_req) {
+        warn(
+                "[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n",
+                cache_pkt->cacheReq->getInst()->readTid(),
+                cache_pkt->cacheReq->getInst()->seqNum,
+                cache_pkt->cacheReq->getInst()->getMemAddr());
+    }
+    
     assert(cache_req);
 
 
@@ -661,9 +905,27 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
             DPRINTF(InOrderCachePort,
                     "[tid:%u]: [sn:%i]: Processing cache access\n",
                     tid, inst->seqNum);
-
-            inst->completeAcc(pkt);
-
+            
+            if (inst->splitInst) {
+                inst->splitFinishCnt++;
+                
+                if (inst->splitFinishCnt == 2) {
+
+                    cache_req->memReq->setVirt(0/*inst->tid*/, 
+                                               inst->getMemAddr(),
+                                               inst->splitTotalSize,
+                                               0,
+                                               0);
+                    
+                    Packet split_pkt(cache_req->memReq, cache_req->pktCmd,
+                                     Packet::Broadcast);                    
+                    split_pkt.dataStatic(inst->splitMemData);
+                    inst->completeAcc(&split_pkt);
+                }                
+            } else {                            
+                inst->completeAcc(pkt);
+            }
+            
             if (inst->isLoad()) {
                 assert(cache_pkt->isRead());
 
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 50cb47519..715ebd878 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -72,7 +72,10 @@ class CacheUnit : public Resource
         CompleteWriteData,
         Fetch,
         ReadData,
-        WriteData
+        WriteData,
+        InitSecondSplitRead,
+        InitSecondSplitWrite,
+        CompleteSecondSplitRead
     };
 
   public:
@@ -124,6 +127,9 @@ class CacheUnit : public Resource
                                         int res_idx, int slot_num,
                                         unsigned cmd);
 
+    ResReqPtr findRequest(DynInstPtr inst);
+    ResReqPtr findSplitRequest(DynInstPtr inst, int idx);
+
     void requestAgain(DynInstPtr inst, bool &try_request);
 
     int getSlot(DynInstPtr inst);
@@ -155,7 +161,7 @@ class CacheUnit : public Resource
 
     /** Returns a specific port. */
     Port *getPort(const std::string &if_name, int idx);
-
+    
     template <class T>
     Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
 
@@ -169,7 +175,7 @@ class CacheUnit : public Resource
     /** Read/Write on behalf of an instruction.
      *  curResSlot needs to be a valid value in instruction.
      */
-    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL);
+    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL);
 
     void prefetch(DynInstPtr inst);
 
@@ -237,17 +243,18 @@ class CacheRequest : public ResourceRequest
   public:
     CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx,
                  int slot_num, unsigned cmd, int req_size,
-                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id)
+                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx)
         : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
           pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
           retryPkt(NULL), memAccComplete(false), memAccPending(false),
-          tlbStall(false)
+          tlbStall(false), splitAccess(false), splitAccessNum(-1),
+          split2ndAccess(false), instIdx(idx)
     { }
 
 
     virtual ~CacheRequest()
     {
-        if (reqData) {
+        if (reqData && !splitAccess) {
             delete [] reqData;
         }
     }
@@ -261,6 +268,11 @@ class CacheRequest : public ResourceRequest
         memAccComplete = completed;
     }
 
+    bool is2ndSplit() 
+    {
+        return split2ndAccess;
+    }
+    
     bool isMemAccComplete() { return memAccComplete; }
 
     void setMemAccPending(bool pending = true) { memAccPending = pending; }
@@ -276,19 +288,27 @@ class CacheRequest : public ResourceRequest
     bool memAccComplete;
     bool memAccPending;
     bool tlbStall;
+
+    bool splitAccess;
+    int splitAccessNum;
+    bool split2ndAccess;
+    int instIdx;    
+    
 };
 
 class CacheReqPacket : public Packet
 {
   public:
     CacheReqPacket(CacheRequest *_req,
-                   Command _cmd, short _dest)
-        : Packet(_req->memReq, _cmd, _dest), cacheReq(_req)
+                   Command _cmd, short _dest, int _idx = 0)
+        : Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx)
     {
 
     }
 
     CacheRequest *cacheReq;
+    int instIdx;
+    
 };
 
 #endif //__CPU_CACHE_UNIT_HH__
-- 
cgit v1.2.3


From be6724f7e7a1c1d2f305c814cf3aa23d54a676e2 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:43 -0500
Subject: inorder: implement split stores

---
 src/cpu/inorder/inorder_dyn_inst.cc     |   3 +-
 src/cpu/inorder/inorder_dyn_inst.hh     |   2 +-
 src/cpu/inorder/resources/cache_unit.cc | 140 +++++++++++++++++++++++++-------
 src/cpu/inorder/resources/cache_unit.hh |   3 +-
 4 files changed, 117 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 9c0313721..c0e5aa69b 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -112,6 +112,7 @@ InOrderDynInst::initVars()
     fetchMemReq = NULL;
     dataMemReq = NULL;
     splitMemData = NULL;
+    split2ndAddr = 0;
     split2ndAccess = false;
     splitInst = false;
     splitFinishCnt = 0;
@@ -192,7 +193,7 @@ InOrderDynInst::~InOrderDynInst()
     }
 
     if (splitMemData) {
-        delete splitMemData;
+        delete [] splitMemData;
     }
     
     fault = NoFault;
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 6f5b7c0e9..ad4da9aab 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -341,7 +341,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     unsigned split2ndFlags;
     bool splitInst;
     int splitFinishCnt;
-    
+    uint64_t *split2ndStoreDataPtr;    
     
     ////////////////////////////////////////////////////////////
     //
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 85ef18a55..3fa1ed180 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -283,6 +283,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                 inst->readTid(), inst->seqNum, inst->getMemAddr());
         break;
 
+      case InitSecondSplitWrite:
+        pkt_cmd = MemCmd::WriteReq;
+
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: Write request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->split2ndAddr);
+        break;
+
       case InitiateWriteData:
         pkt_cmd = MemCmd::WriteReq;
 
@@ -327,7 +335,8 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
                 "instruction\n ", inst->readTid(), inst->seqNum);
 
         service_request = true;
-    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead) {        
+    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead &&
+               inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) {        
         // If same command, just check to see if memory access was completed
         // but dont try to re-execute
         DPRINTF(InOrderCachePort,
@@ -406,7 +415,7 @@ Fault
 CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
 {
     CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
-    assert(cache_req);
+    assert(cache_req && "Can't Find Instruction for Read!");
 
     // The block size of our peer
     unsigned blockSize = this->cachePort->peerBlockSize();
@@ -456,7 +465,8 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
         inst->resSched.push(new ScheduleEntry(stage_num + 1, 
                                               1/*stage_pri*/, 
                                               cpu->resPool->getResIdx(DCache),
-                                              CacheUnit::CompleteSecondSplitRead, 1)
+                                              CacheUnit::CompleteSecondSplitRead, 
+                                              1)
             );
 
 
@@ -473,12 +483,8 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
         inst->split2ndFlags = flags;        
     }
     
-    //cout << "h1" << endl;
-    
     doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read);
 
-    //cout << "h2" << endl;
-
     if (cache_req->fault == NoFault) {
         if (!cache_req->splitAccess) {            
             cache_req->reqData = new uint8_t[dataSize];
@@ -494,8 +500,6 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
         }        
     }
 
-    //cout << "h3" << endl;
-
     return cache_req->fault;
 }
 
@@ -505,7 +509,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
             uint64_t *write_res)
 {
     CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
-    assert(cache_req);
+    assert(cache_req && "Can't Find Instruction for Write!");
 
     // The block size of our peer
     unsigned blockSize = this->cachePort->peerBlockSize();
@@ -513,22 +517,75 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
     //The size of the data we're trying to read.
     int dataSize = sizeof(T);
 
+    if (inst->split2ndAccess) {     
+        dataSize = inst->split2ndSize;
+        cache_req->splitAccess = true;        
+        cache_req->split2ndAccess = true;
+        
+        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+                inst->getMemAddr(), inst->split2ndAddr);       
+    }  
+
     //The address of the second part of this access if it needs to be split
     //across a cache line boundary.
     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 
-    if (secondAddr > addr) {
-        assert(0 && "Need Split Write Code!");
-    }    
+    if (secondAddr > addr && !inst->split2ndAccess) {
+        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+                addr, secondAddr);       
 
-    int acc_size =  sizeof(T);
-    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
+        // Save All "Total" Split Information
+        // ==============================
+        inst->splitInst = true;        
+        inst->splitTotalSize = dataSize;
+
+        // Schedule Split Read/Complete for Instruction
+        // ==============================
+        int stage_num = cache_req->getStageNum();
+        
+        int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+        
+        inst->resSched.push(new ScheduleEntry(stage_num, 
+                                              stage_pri, 
+                                              cpu->resPool->getResIdx(DCache),
+                                              CacheUnit::InitSecondSplitWrite,
+                                              1)
+            );
+
+        inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                              1/*stage_pri*/, 
+                                              cpu->resPool->getResIdx(DCache),
+                                              CacheUnit::CompleteSecondSplitWrite, 
+                                              1)
+            );
+
+        // Split Information for First Access
+        // ==============================
+        dataSize = secondAddr - addr;
+        cache_req->splitAccess = true;
+
+        // Split Information for Second Access
+        // ==============================
+        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndAddr = secondAddr;            
+        inst->split2ndStoreDataPtr = &cache_req->inst->storeData;
+        inst->split2ndStoreDataPtr += dataSize;            
+        inst->split2ndFlags = flags;        
+    }    
+        
+    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write);
 
     if (cache_req->fault == NoFault) {
-        cache_req->reqData = new uint8_t[acc_size];
-        doCacheAccess(inst, write_res);
+        if (!cache_req->splitAccess) {            
+            // Remove this line since storeData is saved in INST?
+            cache_req->reqData = new uint8_t[dataSize];
+            doCacheAccess(inst, write_res);
+        } else {            
+            doCacheAccess(inst, write_res, cache_req);            
+        }        
+        
     }
-
+    
     return cache_req->fault;
 }
 
@@ -596,9 +653,6 @@ CacheUnit::execute(int slot_num)
             inst->execute();
         } else {
             inst->initiateAcc();
-            //if (inst->splitAccess) {
-            //  assert(0 && " Marked as spill inst");                
-            //}
         }
         
         break;
@@ -608,6 +662,7 @@ CacheUnit::execute(int slot_num)
                 "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n",
                 tid, inst->seqNum, name(), cache_req->inst->split2ndAddr);
         inst->split2ndAccess = true;
+        assert(inst->split2ndAddr != 0);
         read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);        
         break;
 
@@ -615,9 +670,10 @@ CacheUnit::execute(int slot_num)
         DPRINTF(InOrderCachePort,
                 "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n",
                 tid, inst->seqNum, name(), cache_req->inst->getMemAddr());
-        assert(0);        
+
         inst->split2ndAccess = true;
-        //write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);        
+        assert(inst->split2ndAddr != 0);
+        write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL);        
         break;
 
 
@@ -682,6 +738,24 @@ CacheUnit::execute(int slot_num)
             cache_req->setMemStall(true);            
         }
         break;
+
+      case CompleteSecondSplitWrite:
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n",
+                tid, inst->seqNum);
+
+        if (cache_req->isMemAccComplete() ||
+            inst->isDataPrefetch() ||
+            inst->isInstPrefetch()) {
+            cache_req->setMemStall(false);            
+            cache_req->done();
+        } else {
+            DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
+                    tid, cache_req->inst->split2ndAddr);
+            cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
+        }
+        break;
         
       default:
         fatal("Unrecognized command to %s", resName);
@@ -761,9 +835,13 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split
 
     if (cache_req->dataPkt->isRead()) {
         cache_req->dataPkt->dataStatic(cache_req->reqData);
-    } else if (cache_req->dataPkt->isWrite()) {
-        cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);
-
+    } else if (cache_req->dataPkt->isWrite()) {        
+        if (inst->split2ndAccess) {            
+            cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr);
+        } else {
+            cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);            
+        }
+        
         if (cache_req->memReq->isCondSwap()) {
             assert(write_res);
             cache_req->memReq->setExtraData(*write_res);
@@ -910,7 +988,6 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
                 inst->splitFinishCnt++;
                 
                 if (inst->splitFinishCnt == 2) {
-
                     cache_req->memReq->setVirt(0/*inst->tid*/, 
                                                inst->getMemAddr(),
                                                inst->splitTotalSize,
@@ -919,7 +996,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
                     
                     Packet split_pkt(cache_req->memReq, cache_req->pktCmd,
                                      Packet::Broadcast);                    
-                    split_pkt.dataStatic(inst->splitMemData);
+
+
+                    if (inst->isLoad()) {                        
+                        split_pkt.dataStatic(inst->splitMemData);
+                    } else  {                            
+                        split_pkt.dataStatic(&inst->storeData);                        
+                    }
+                    
                     inst->completeAcc(&split_pkt);
                 }                
             } else {                            
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 715ebd878..8200ace87 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -75,7 +75,8 @@ class CacheUnit : public Resource
         WriteData,
         InitSecondSplitRead,
         InitSecondSplitWrite,
-        CompleteSecondSplitRead
+        CompleteSecondSplitRead,
+        CompleteSecondSplitWrite
     };
 
   public:
-- 
cgit v1.2.3


From 9357e353fc976a409fb0cb3a875b402f452577f7 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:48 -0500
Subject: inorder: inst count mgmt

---
 src/cpu/inorder/SConscript                   |   2 +
 src/cpu/inorder/cpu.cc                       |  38 ++++++----
 src/cpu/inorder/cpu.hh                       |   2 +
 src/cpu/inorder/inorder_dyn_inst.cc          |   8 ++-
 src/cpu/inorder/inorder_dyn_inst.hh          |   5 +-
 src/cpu/inorder/pipeline_stage.cc            |  26 +++++--
 src/cpu/inorder/reg_dep_map.cc               |  24 +++++++
 src/cpu/inorder/reg_dep_map.hh               |   2 +
 src/cpu/inorder/resource.cc                  |  34 +++++----
 src/cpu/inorder/resource.hh                  |  11 ++-
 src/cpu/inorder/resources/cache_unit.cc      | 103 ++++++++++++++++++++-------
 src/cpu/inorder/resources/cache_unit.hh      |   5 +-
 src/cpu/inorder/resources/graduation_unit.cc |   2 -
 src/cpu/inorder/resources/use_def.cc         |  15 +++-
 14 files changed, 210 insertions(+), 67 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript
index afc6a29e4..f222350af 100644
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@@ -54,6 +54,8 @@ if 'InOrderCPU' in env['CPU_MODELS']:
 	TraceFlag('InOrderGraduation')
 	TraceFlag('ThreadModel')
 	TraceFlag('RefCount')
+ 	TraceFlag('AddrDep')	
+	
 
 	CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU',
 	       'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred',
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index e864c8c86..e28af9e7a 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -333,6 +333,12 @@ InOrderCPU::InOrderCPU(Params *params)
                                             0);        
     }
 
+    dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyReqInst->setSquashed();
+
+    dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyBufferInst->setSquashed();
+    
     lastRunningCycle = curTick;
 
     // Reset CPU to reset state.
@@ -343,6 +349,8 @@ InOrderCPU::InOrderCPU(Params *params)
     reset();
 #endif
 
+    dummyBufferInst->resetInstCount();
+    
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
     scheduleTickEvent(0);
 }
@@ -1176,6 +1184,8 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     removeInst(inst);
 }
 
+// currently unused function, but substitute repetitive code w/this function
+// call
 void
 InOrderCPU::addToRemoveList(DynInstPtr &inst)
 {
@@ -1194,6 +1204,10 @@ InOrderCPU::removeInst(DynInstPtr &inst)
     removeInstsThisCycle = true;
 
     // Remove the instruction.
+
+    DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x "
+            "[sn:%lli] to remove list\n",
+            inst->threadNumber, inst->readPC(), inst->seqNum);
     removeList.push(inst->getInstListIt());
 }
 
@@ -1208,7 +1222,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
 
     inst_iter--;
 
-    DPRINTF(InOrderCPU, "Deleting instructions from CPU instruction "
+    DPRINTF(InOrderCPU, "Squashing instructions from CPU instruction "
             "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
             tid, seq_num, (*inst_iter)->seqNum);
 
@@ -1238,6 +1252,9 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
 
         (*instIt)->setSquashed();
 
+        DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                (*instIt)->threadNumber, (*instIt)->readPC(), (*instIt)->seqNum);
         removeList.push(instIt);
     }
 }
@@ -1251,7 +1268,7 @@ InOrderCPU::cleanUpRemovedInsts()
                 "[tid:%i] [sn:%lli] PC %#x\n",
                 (*removeList.front())->threadNumber,
                 (*removeList.front())->seqNum,
-                (*removeList.front())->readPC());
+               (*removeList.front())->readPC());
 
         DynInstPtr inst = *removeList.front();
         ThreadID tid = inst->threadNumber;
@@ -1279,11 +1296,6 @@ InOrderCPU::cleanUpRemovedInsts()
         instList[tid].erase(removeList.front());
 
         removeList.pop();
-
-        DPRINTF(RefCount, "pop from remove list: [sn:%i]: Refcount = %i.\n",
-                inst->seqNum,
-                0/*inst->curCount()*/);
-
     }
 
     removeInstsThisCycle = false;
@@ -1295,22 +1307,18 @@ InOrderCPU::cleanUpRemovedReqs()
     while (!reqRemoveList.empty()) {
         ResourceRequest *res_req = reqRemoveList.front();
 
-        DPRINTF(RefCount, "[tid:%i]: Removing Request, "
-                "[sn:%lli] [slot:%i] [stage_num:%i] [res:%s] [refcount:%i].\n",
+        DPRINTF(InOrderCPU, "[tid:%i] [sn:%lli]: Removing Request "
+                "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n",
                 res_req->inst->threadNumber,
                 res_req->inst->seqNum,
-                res_req->getSlot(),
                 res_req->getStageNum(),
                 res_req->res->name(),
-                0/*res_req->inst->curCount()*/);
+                (res_req->isCompleted()) ? res_req->getComplSlot() : res_req->getSlot(),
+                res_req->isCompleted());
 
         reqRemoveList.pop();
 
         delete res_req;
-
-        DPRINTF(RefCount, "after remove request: [sn:%i]: Refcount = %i.\n",
-                res_req->inst->seqNum,
-                0/*res_req->inst->curCount()*/);
     }
 }
 
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index 253b5b18f..0c42f349e 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -247,6 +247,8 @@ class InOrderCPU : public BaseCPU
     /** Instruction used to signify that there is no *real* instruction in 
         buffer slot */
     DynInstPtr dummyInst[ThePipeline::MaxThreads];
+    DynInstPtr dummyBufferInst;
+    DynInstPtr dummyReqInst;
 
     /** Used by resources to signify a denied access to a resource. */
     ResourceRequest *dummyReq[ThePipeline::MaxThreads];
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index c0e5aa69b..75e1c570f 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -164,7 +164,7 @@ InOrderDynInst::initVars()
 
     // Update Instruction Count for this instruction
     ++instcount;
-    if (instcount > 500) {
+    if (instcount > 100) {
         fatal("Number of Active Instructions in CPU is too high. "
                 "(Not Dereferencing Ptrs. Correctly?)\n");
     }
@@ -175,6 +175,12 @@ InOrderDynInst::initVars()
             threadNumber, seqNum, instcount);
 }
 
+void
+InOrderDynInst::resetInstCount()
+{
+    instcount = 0;
+}
+
 
 InOrderDynInst::~InOrderDynInst()
 {
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index ad4da9aab..8a5f9cf25 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -1032,14 +1032,15 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /** Count of total number of dynamic instructions. */
     static int instcount;
 
+    void resetInstCount();
+    
     /** Dumps out contents of this BaseDynInst. */
     void dump();
 
     /** Dumps out contents of this BaseDynInst into given string. */
     void dump(std::string &outstring);
 
-
-  //inline int curCount() { return curCount(); }
+    //inline int curCount() { return curCount(); }
 };
 
 
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index c991fe1bd..571cf10bb 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -101,8 +101,6 @@ PipelineStage::setCPU(InOrderCPU *cpu_ptr)
 {
     cpu = cpu_ptr;
 
-    dummyBufferInst = new InOrderDynInst(cpu_ptr, NULL, 0, 0, 0);
-
     DPRINTF(InOrderStage, "Set CPU pointer.\n");
 
     tracer = dynamic_cast<Trace::InOrderTrace *>(cpu->getTracer());
@@ -388,6 +386,8 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
                     prevStage->insts[i]->seqNum,
                     prevStage->insts[i]->readPC());
             prevStage->insts[i]->setSquashed();
+
+            prevStage->insts[i] = cpu->dummyBufferInst;
         }
     }
 }
@@ -609,7 +609,7 @@ PipelineStage::sortInsts()
 
             skidBuffer[tid].push(prevStage->insts[i]);
 
-            prevStage->insts[i] = dummyBufferInst;
+            prevStage->insts[i] = cpu->dummyBufferInst;
 
         }
     }
@@ -816,7 +816,7 @@ PipelineStage::processThread(bool &status_change, ThreadID tid)
     //     call processInsts()
     // If status is Unblocking,
     //     buffer any instructions coming from fetch
-    //     continue trying to empty skid buffer
+   //     continue trying to empty skid buffer
     //     check if stall conditions have passed
 
     // Stage should try to process as many instructions as its bandwidth
@@ -960,6 +960,8 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
                 }
 
                 reqs_processed++;                
+
+                req->stagePasses++;                
             } else {
                 DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
                         "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
@@ -969,7 +971,7 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
                 if (req->isMemStall() && 
                     cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
                     // Save Stalling Instruction
-                    DPRINTF(ThreadModel, "[tid:%i] Detected cache miss.\n", tid);
+                    DPRINTF(ThreadModel, "[tid:%i] [sn:%i] Detected cache miss.\n", tid, inst->seqNum);
 
                     DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n",
                              tid, inst->seqNum);                    
@@ -994,6 +996,20 @@ PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
                     cpu->activateNextReadyContext();                                                                                               
                 }
                 
+                // Mark request for deletion
+                // if it isnt currently being used by a resource
+                if (!req->hasSlot()) {                   
+                    DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no slot in resource.\n",
+                            inst->seqNum);
+                    
+                    cpu->reqRemoveList.push(req);
+                } else {
+                    DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, in resource [slot:%i].\n",
+                            inst->seqNum, req->getSlot());
+                    //req = cpu->dummyReq[tid];                    
+                }
+                
+                
                 break;
             }
 
diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc
index 51782a588..7fac0a905 100644
--- a/src/cpu/inorder/reg_dep_map.cc
+++ b/src/cpu/inorder/reg_dep_map.cc
@@ -235,3 +235,27 @@ RegDepMap::findBypassInst(unsigned idx)
 
     return NULL;
 }
+
+void
+RegDepMap::dump()
+{
+    
+    for (int idx=0; idx < regMap.size(); idx++) {
+        
+        if (regMap[idx].size() > 0) {
+            cprintf("Reg #%i (size:%i): ", idx, regMap[idx].size());
+
+            std::list<DynInstPtr>::iterator list_it = regMap[idx].begin();
+            std::list<DynInstPtr>::iterator list_end = regMap[idx].end();
+        
+            while (list_it != list_end) {
+                cprintf("[sn:%i] ", (*list_it)->seqNum);
+
+                list_it++;            
+            }        
+
+            cprintf("\n");
+        }
+        
+    }    
+}
diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh
index b78e211bb..cb9d35bf4 100644
--- a/src/cpu/inorder/reg_dep_map.hh
+++ b/src/cpu/inorder/reg_dep_map.hh
@@ -88,6 +88,8 @@ class RegDepMap
     /** Size of Dependency of Map */
     int depSize(unsigned idx);
 
+    void dump();
+    
   protected:
     // Eventually make this a map of lists for
     // efficiency sake!
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc
index dcf5f3117..e63925fe8 100644
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -101,12 +101,6 @@ Resource::slotsInUse()
 void
 Resource::freeSlot(int slot_idx)
 {
-    DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource "
-            "[slot:%i].\n",
-            reqMap[slot_idx]->inst->readTid(),
-            reqMap[slot_idx]->inst->seqNum,
-            slot_idx);
-
     // Put slot number on this resource's free list
     availSlots.push_back(slot_idx);
 
@@ -181,7 +175,7 @@ Resource::request(DynInstPtr inst)
     // See if the resource is already serving this instruction.
     // If so, use that request;
     bool try_request = false;
-    int slot_num;
+    int slot_num = -1;
     int stage_num;
     ResReqPtr inst_req = findRequest(inst);
 
@@ -440,6 +434,10 @@ ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst,
         }
         
 #endif
+
+        stagePasses = 0;
+        complSlotNum = -1;
+        
 }
 
 ResourceRequest::~ResourceRequest()
@@ -454,17 +452,29 @@ ResourceRequest::~ResourceRequest()
 void
 ResourceRequest::done(bool completed)
 {
-    DPRINTF(Resource, "%s done with request from [sn:%i] [tid:%i].\n",
-            res->name(), inst->seqNum, inst->readTid());
+    DPRINTF(Resource, "%s [slot:%i] done with request from [sn:%i] [tid:%i].\n",
+            res->name(), slotNum, inst->seqNum, inst->readTid());
 
     setCompleted(completed);
 
-    // Add to remove list
-    res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
-
+    // Used for debugging purposes
+    if (completed) {
+        complSlotNum = slotNum;
+    
+        // Would like to start a convention such as all requests deleted in resources/pipeline
+        // but a little more complex then it seems...
+        // For now, all COMPLETED requests deleted in resource..
+        //          all FAILED requests deleted in pipeline stage
+        //          *all SQUASHED requests deleted in resource
+        res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
+    }
+    
     // Free Slot So Another Instruction Can Use This Resource
     res->freeSlot(slotNum);
 
+    // change slot # to -1, since we check slotNum to see if request is still valid
+    slotNum = -1;
+        
     res->instReqsProcessed++;
 }
 
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index 383340df2..b9650df18 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -331,6 +331,8 @@ class ResourceRequest
      */
     void done(bool completed = true);
 
+    short stagePasses;
+    
     /////////////////////////////////////////////
     //
     // GET RESOURCE REQUEST IDENTIFICATION / INFO
@@ -339,8 +341,11 @@ class ResourceRequest
     /** Get Resource Index */
     int getResIdx() { return resIdx; }
 
+       
     /** Get Slot Number */
     int getSlot() { return slotNum; }
+    int getComplSlot() { return complSlotNum; }
+    bool hasSlot()  { return slotNum >= 0; }     
 
     /** Get Stage Number */
     int getStageNum() { return stageNum; }
@@ -363,6 +368,9 @@ class ResourceRequest
     /** Instruction being used */
     DynInstPtr inst;
 
+    /** Not guaranteed to be set, used for debugging */
+    InstSeqNum seqNum;
+    
     /** Fault Associated With This Resource Request */
     Fault fault;
 
@@ -396,7 +404,8 @@ class ResourceRequest
     int stageNum;
     int resIdx;
     int slotNum;
-
+    int complSlotNum;
+    
     /** Resource Request Status */
     bool completed;
     bool squashed;
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 3fa1ed180..00058163f 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -155,14 +155,11 @@ CacheUnit::getSlot(DynInstPtr inst)
             return -1;
 
         inst->memTime = curTick;
-        addrList[tid].push_back(req_addr);
-        addrMap[tid][req_addr] = inst->seqNum;
-        DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
-                inst->readTid(), inst->seqNum, req_addr);
+        setAddrDependency(inst);            
         return new_slot;
     } else {
         // Allow same instruction multiple accesses to same address
+        // should only happen maybe after a squashed inst. needs to replay
         if (addrMap[tid][req_addr] == inst->seqNum) {
             int new_slot = Resource::getSlot(inst);
         
@@ -183,31 +180,45 @@ CacheUnit::getSlot(DynInstPtr inst)
 }
 
 void
-CacheUnit::freeSlot(int slot_num)
+CacheUnit::setAddrDependency(DynInstPtr inst)
 {
-    ThreadID tid = reqMap[slot_num]->inst->readTid();
-
-    vector<Addr>::iterator vect_it = 
-        find(addrList[tid].begin(), addrList[tid].end(),
-             reqMap[slot_num]->inst->getMemAddr());
-    
-    assert(vect_it != addrList[tid].end() || 
-           reqMap[slot_num]->inst->splitInst);
+    Addr req_addr = inst->getMemAddr();
+    ThreadID tid = inst->readTid();
 
+    addrList[tid].push_back(req_addr);
+    addrMap[tid][req_addr] = inst->seqNum;
     DPRINTF(InOrderCachePort,
-            "[tid:%i]: Address %08p removed from dependency list\n",
-            reqMap[slot_num]->inst->readTid(), (*vect_it));
+            "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
+            inst->readTid(), inst->seqNum, req_addr);
+    DPRINTF(AddrDep,
+            "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
+            inst->readTid(), inst->seqNum, req_addr);
+}
+
+void
+CacheUnit::removeAddrDependency(DynInstPtr inst)
+{
+    ThreadID tid = inst->readTid();
+
+    Addr mem_addr = inst->getMemAddr();
+    
+    // Erase from Address List
+    vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(),
+                                          mem_addr);
+    assert(vect_it != addrList[tid].end() || inst->splitInst);
 
     if (vect_it != addrList[tid].end()) {
-        
-        DPRINTF(InOrderCachePort,
-                "[tid:%i]: Address %08p removed from dependency list\n",
-                reqMap[slot_num]->inst->readTid(), (*vect_it));
- 
+        DPRINTF(AddrDep,
+                "[tid:%i]: [sn:%i] Address %08p removed from dependency list\n",
+                inst->readTid(), inst->seqNum, (*vect_it));
+
         addrList[tid].erase(vect_it);
-    }   
 
-    Resource::freeSlot(slot_num);
+        // Erase From Address Map (Used for Debugging)
+        addrMap[tid].erase(addrMap[tid].find(mem_addr));
+    }
+    
+
 }
 
 ResReqPtr
@@ -687,8 +698,14 @@ CacheUnit::execute(int slot_num)
             DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n",
                     tid, seq_num, inst->staticInst->disassemble(inst->PC));
 
+            removeAddrDependency(inst);
+            
             delete cache_req->dataPkt;
-            //cache_req->setMemStall(false);            
+            
+            // Do not stall and switch threads for fetch... for now..
+            // TODO: We need to detect cache misses for latencies > 1
+            // cache_req->setMemStall(false);            
+            
             cache_req->done();
         } else {
             DPRINTF(InOrderCachePort,
@@ -711,6 +728,7 @@ CacheUnit::execute(int slot_num)
         if (cache_req->isMemAccComplete() ||
             inst->isDataPrefetch() ||
             inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
             cache_req->setMemStall(false);            
             cache_req->done();
         } else {
@@ -729,6 +747,7 @@ CacheUnit::execute(int slot_num)
         if (cache_req->isMemAccComplete() ||
             inst->isDataPrefetch() ||
             inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
             cache_req->setMemStall(false);            
             cache_req->done();
         } else {
@@ -747,6 +766,7 @@ CacheUnit::execute(int slot_num)
         if (cache_req->isMemAccComplete() ||
             inst->isDataPrefetch() ||
             inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
             cache_req->setMemStall(false);            
             cache_req->done();
         } else {
@@ -911,6 +931,10 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
                 "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
                 cache_pkt->cacheReq->getInst()->readTid(),
                 cache_pkt->cacheReq->getInst()->seqNum);
+        DPRINTF(RefCount,
+                "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
+                cache_pkt->cacheReq->getTid(),
+                cache_pkt->cacheReq->seqNum);
 
         cache_pkt->cacheReq->done();
         delete cache_pkt;
@@ -1154,6 +1178,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                     "[tid:%i] Squashing request from [sn:%i]\n",
                     req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
 
+            if (req_ptr->isSquashed()) {
+                DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n",
+                        req_ptr->getInst()->readTid(),
+                        req_ptr->getInst()->seqNum);
+                map_it++;                
+                continue;                
+            }
+            
             req_ptr->setSquashed();
 
             req_ptr->getInst()->setSquashed();
@@ -1178,7 +1210,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
 
                 // Mark slot for removal from resource
                 slot_remove_list.push_back(req_ptr->getSlot());
+
+                DPRINTF(InOrderCachePort,
+                        "[tid:%i] Squashing request from [sn:%i]\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
+            } else {
+                DPRINTF(InOrderCachePort,
+                        "[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
+                DPRINTF(RefCount,
+                        "[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum,
+                        req_ptr->getInst()->splitInst);
+            }
+
+            if (req_ptr->getInst()->validMemAddr()) {                    
+                DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n",
+                        req_ptr->getInst()->readTid(),
+                        req_ptr->getInst()->seqNum, 
+                        req_ptr->getInst()->getMemAddr());
+                
+                removeAddrDependency(req_ptr->getInst());
             }
+
         }
 
         map_it++;
@@ -1320,3 +1374,4 @@ CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags,
 {
     return write(inst, (uint32_t)data, addr, flags, res);
 }
+
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 8200ace87..9004f3b93 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -135,8 +135,6 @@ class CacheUnit : public Resource
 
     int getSlot(DynInstPtr inst);
 
-    void freeSlot(int slot_num);
-
     /** Execute the function of this resource. The Default is action
      *  is to do nothing. More specific models will derive from this
      *  class and define their own execute function.
@@ -184,6 +182,9 @@ class CacheUnit : public Resource
 
     uint64_t getMemData(Packet *packet);
 
+    void setAddrDependency(DynInstPtr inst);
+    void removeAddrDependency(DynInstPtr inst);
+    
   protected:
     /** Cache interface. */
     CachePort *cachePort;
diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc
index 2d7cd5c8c..2dad9889a 100644
--- a/src/cpu/inorder/resources/graduation_unit.cc
+++ b/src/cpu/inorder/resources/graduation_unit.cc
@@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num)
                     "[tid:%i] Graduating instruction [sn:%i].\n",
                     tid, inst->seqNum);
 
-            DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/);
-
             // Release Non-Speculative "Block" on instructions that could not execute
             // because there was a non-speculative inst. active.
             // @TODO: Fix this functionality. Probably too conservative.
diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc
index a4f3a0d21..5fd6a4724 100644
--- a/src/cpu/inorder/resources/use_def.cc
+++ b/src/cpu/inorder/resources/use_def.cc
@@ -191,6 +191,7 @@ UseDefUnit::execute(int slot_idx)
                     DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
                             "[sn:%i] to write\n",
                             tid, outReadSeqNum[tid]);
+                    ud_req->done(false);
                 }
 
             } else {
@@ -249,6 +250,7 @@ UseDefUnit::execute(int slot_idx)
                         DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
                                 "[sn:%i] to forward\n",
                                 tid, outReadSeqNum[tid]);
+                        ud_req->done(false);
                     }
                 } else {
                     DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i"
@@ -258,6 +260,7 @@ UseDefUnit::execute(int slot_idx)
                             "register (idx=%i)\n",
                             tid, reg_idx);
                     outReadSeqNum[tid] = inst->seqNum;
+                    ud_req->done(false);
                 }
             }
         }
@@ -360,6 +363,7 @@ UseDefUnit::execute(int slot_idx)
                     DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
                             "[sn:%i] to read\n",
                             tid, outReadSeqNum);
+                    ud_req->done(false);
                 }
             } else {
                 DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is "
@@ -369,6 +373,7 @@ UseDefUnit::execute(int slot_idx)
                         "register (idx=%i)\n",
                         tid, reg_idx);
                 outWriteSeqNum[tid] = inst->seqNum;
+                ud_req->done(false);
             }
         }
         break;
@@ -402,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
                     req_ptr->getInst()->readTid(),
                     req_ptr->getInst()->seqNum);
 
-            regDepMap[tid]->remove(req_ptr->getInst());
-
             int req_slot_num = req_ptr->getSlot();
 
-            if (latency > 0)
+            if (latency > 0) {                
+                assert(0);
+                
                 unscheduleEvent(req_slot_num);
+            }
+            
+            // Mark request for later removal
+            cpu->reqRemoveList.push(req_ptr);
 
             // Mark slot for removal from resource
             slot_remove_list.push_back(req_ptr->getSlot());
-- 
cgit v1.2.3


From c7f6e2661c958d996479ae9fe8c8cf2c8a9482f6 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Sun, 31 Jan 2010 18:30:59 -0500
Subject: inorder: double delete inst bug Make sure that instructions are
 dereferenced/deleted twice by marking they are on the remove list

---
 src/cpu/inorder/cpu.cc                  | 48 +++++++++++++----
 src/cpu/inorder/inorder_dyn_inst.cc     |  1 +
 src/cpu/inorder/inorder_dyn_inst.hh     | 10 +++-
 src/cpu/inorder/pipeline_stage.cc       |  1 +
 src/cpu/inorder/resources/cache_unit.cc | 95 +++++++++++++++++++--------------
 5 files changed, 104 insertions(+), 51 deletions(-)

(limited to 'src')

diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index e28af9e7a..7342f9bc5 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -1190,8 +1190,18 @@ void
 InOrderCPU::addToRemoveList(DynInstPtr &inst)
 {
     removeInstsThisCycle = true;
-
-    removeList.push(inst->getInstListIt());
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    }  else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
+    
 }
 
 void
@@ -1204,11 +1214,18 @@ InOrderCPU::removeInst(DynInstPtr &inst)
     removeInstsThisCycle = true;
 
     // Remove the instruction.
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    } else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already on remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
 
-    DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x "
-            "[sn:%lli] to remove list\n",
-            inst->threadNumber, inst->readPC(), inst->seqNum);
-    removeList.push(inst->getInstListIt());
 }
 
 void
@@ -1252,11 +1269,22 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
 
         (*instIt)->setSquashed();
 
-        DPRINTF(RefCount, "Pushing instruction [tid:%i] PC %#x "
-                "[sn:%lli] to remove list\n",
-                (*instIt)->threadNumber, (*instIt)->readPC(), (*instIt)->seqNum);
-        removeList.push(instIt);
+        if (!(*instIt)->isRemoveList()) {            
+            DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                    "[sn:%lli] to remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+            (*instIt)->setRemoveList();        
+            removeList.push(instIt);
+        } else {
+            DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                    "[sn:%lli], already on remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+        }
+    
     }
+    
 }
 
 
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 75e1c570f..1b55c90e0 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -115,6 +115,7 @@ InOrderDynInst::initVars()
     split2ndAddr = 0;
     split2ndAccess = false;
     splitInst = false;
+    splitInstSked = false;    
     splitFinishCnt = 0;
     
     effAddr = 0;
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 8a5f9cf25..8c9cd69e0 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -164,6 +164,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted
                                  /// instructions ahead of it
         SerializeAfter,          /// Needs to serialize instructions behind it
         SerializeHandled,        /// Serialization has been handled
+        RemoveList,               /// Is Instruction on Remove List?
         NumStatus
     };
 
@@ -342,7 +343,8 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     bool splitInst;
     int splitFinishCnt;
     uint64_t *split2ndStoreDataPtr;    
-    
+    bool splitInstSked;
+
     ////////////////////////////////////////////////////////////
     //
     //  BASE INSTRUCTION INFORMATION.
@@ -915,6 +917,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     /** Returns whether or not the entry is on the CPU Reg Dep Map */
     bool isRegDepEntry() const { return status[RegDepMapEntry]; }
 
+    /** Sets this instruction as entered on the CPU Reg Dep Map */
+    void setRemoveList() { status.set(RemoveList); }
+
+    /** Returns whether or not the entry is on the CPU Reg Dep Map */
+    bool isRemoveList() const { return status[RemoveList]; }
+
     /** Sets this instruction as completed. */
     void setCompleted() { status.set(Completed); }
 
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc
index 571cf10bb..dcf4d81bf 100644
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -380,6 +380,7 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
     for (int i=0; i < prevStage->size; i++) {
         if (prevStage->insts[i]->threadNumber == tid &&
             prevStage->insts[i]->seqNum > squash_seq_num) {
+            // Change Comment to Annulling previous instruction
             DPRINTF(InOrderStage, "[tid:%i]: Squashing instruction, "
                     "[sn:%i] PC %08p.\n",
                     tid,
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 00058163f..cb1861ea9 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -140,7 +140,8 @@ CacheUnit::getSlot(DynInstPtr inst)
     // For a Split-Load, the instruction would have processed once already
     // causing the address to be unset.
     if (!inst->validMemAddr() && !inst->splitInst) {
-        panic("Mem. Addr. must be set before requesting cache access\n");
+        panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n",
+              inst->readTid(), inst->seqNum);
     }
 
     Addr req_addr = inst->getMemAddr();
@@ -439,7 +440,7 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
         cache_req->splitAccess = true;        
         cache_req->split2ndAccess = true;
         
-        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, 
                 inst->getMemAddr(), inst->split2ndAddr);       
     }  
     
@@ -459,27 +460,31 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
         inst->splitMemData = new uint8_t[dataSize];
         inst->splitTotalSize = dataSize;
         
-
-        // Schedule Split Read/Complete for Instruction
-        // ==============================
-        int stage_num = cache_req->getStageNum();
+        if (!inst->splitInstSked) {
+            // Schedule Split Read/Complete for Instruction
+            // ==============================
+            int stage_num = cache_req->getStageNum();
         
-        int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
         
-        inst->resSched.push(new ScheduleEntry(stage_num, 
-                                              stage_pri, 
-                                              cpu->resPool->getResIdx(DCache),
-                                              CacheUnit::InitSecondSplitRead,
-                                              1)
-            );
-
-        inst->resSched.push(new ScheduleEntry(stage_num + 1, 
-                                              1/*stage_pri*/, 
-                                              cpu->resPool->getResIdx(DCache),
-                                              CacheUnit::CompleteSecondSplitRead, 
-                                              1)
-            );
-
+            inst->resSched.push(new ScheduleEntry(stage_num, 
+                                                  stage_pri, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::InitSecondSplitRead,
+                                                  1)
+                );
+
+            inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                                  1/*stage_pri*/, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::CompleteSecondSplitRead, 
+                                                  1)
+                );
+            inst->splitInstSked = true;
+        } else {
+            DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", 
+                    inst->readTid(), inst->seqNum, addr, secondAddr);                   
+        }
 
         // Split Information for First Access
         // ==============================
@@ -533,7 +538,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
         cache_req->splitAccess = true;        
         cache_req->split2ndAccess = true;
         
-        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (2 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, 
                 inst->getMemAddr(), inst->split2ndAddr);       
     }  
 
@@ -542,7 +547,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 
     if (secondAddr > addr && !inst->split2ndAccess) {
-        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Write Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+            
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum, 
                 addr, secondAddr);       
 
         // Save All "Total" Split Information
@@ -550,25 +556,33 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
         inst->splitInst = true;        
         inst->splitTotalSize = dataSize;
 
-        // Schedule Split Read/Complete for Instruction
-        // ==============================
-        int stage_num = cache_req->getStageNum();
+        if (!inst->splitInstSked) {
+            // Schedule Split Read/Complete for Instruction
+            // ==============================
+            int stage_num = cache_req->getStageNum();
+        
+            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+        
+            inst->resSched.push(new ScheduleEntry(stage_num, 
+                                                  stage_pri, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::InitSecondSplitWrite,
+                                                  1)
+                );
+
+            inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                                  1/*stage_pri*/, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::CompleteSecondSplitWrite, 
+                                                  1)
+                );
+            inst->splitInstSked = true;
+        } else {
+            DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", 
+                    inst->readTid(), inst->seqNum, addr, secondAddr);                   
+        }
         
-        int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
         
-        inst->resSched.push(new ScheduleEntry(stage_num, 
-                                              stage_pri, 
-                                              cpu->resPool->getResIdx(DCache),
-                                              CacheUnit::InitSecondSplitWrite,
-                                              1)
-            );
-
-        inst->resSched.push(new ScheduleEntry(stage_num + 1, 
-                                              1/*stage_pri*/, 
-                                              cpu->resPool->getResIdx(DCache),
-                                              CacheUnit::CompleteSecondSplitWrite, 
-                                              1)
-            );
 
         // Split Information for First Access
         // ==============================
@@ -582,6 +596,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
         inst->split2ndStoreDataPtr = &cache_req->inst->storeData;
         inst->split2ndStoreDataPtr += dataSize;            
         inst->split2ndFlags = flags;        
+        inst->splitInstSked = true;
     }    
         
     doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write);
-- 
cgit v1.2.3