From 55a45d364421e50300d64ac02b4d09d975a39eff Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 23 Mar 2007 11:22:43 -0400
Subject: A couple of minor fixes. 1. Set CPU ID in all modes for the O3 CPU.
 2. Use nextCycle() function to prevent phase drift in O3 CPU. 3. Remove
 assertion in rename map that is no longer true.

src/cpu/o3/alpha/cpu_builder.cc:
    Allow for CPU id in all modes, not just full system.  Also include a parameter that was left out by accident.
src/cpu/o3/alpha/cpu_impl.hh:
    Set the CPU ID properly.
src/cpu/o3/cpu.cc:
src/cpu/o3/cpu.hh:
    Use nextCycle() function so that the CPU does not get out of phase when starting up from quiesces.
src/cpu/o3/rename_map.cc:
    Remove assertion that is no longer true.
tests/configs/o3-timing.py:
    Set CPU's id to 0.

--HG--
extra : convert_revision : 2b69c19adfce2adcc2d1939e89d702bd6674d5d5
---
 src/cpu/o3/alpha/cpu_builder.cc |  7 ++++---
 src/cpu/o3/alpha/cpu_impl.hh    |  1 +
 src/cpu/o3/cpu.cc               | 10 +++++-----
 src/cpu/o3/cpu.hh               | 16 ++++++++++------
 src/cpu/o3/rename_map.cc        |  2 --
 5 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'src/cpu/o3')
diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc
index 5a375a4b8..34754d3c5 100644
--- a/src/cpu/o3/alpha/cpu_builder.cc
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@ -50,11 +50,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
     Param<int> clock;
     Param<int> phase;
     Param<int> numThreads;
+Param<int> cpu_id;
 Param<int> activity;
 
 #if FULL_SYSTEM
 SimObjectParam<System *> system;
-Param<int> cpu_id;
 SimObjectParam<AlphaISA::ITB *> itb;
 SimObjectParam<AlphaISA::DTB *> dtb;
 Param<Tick> profile;
@@ -161,11 +161,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
     INIT_PARAM(clock, "clock speed"),
     INIT_PARAM_DFLT(phase, "clock phase", 0),
     INIT_PARAM(numThreads, "number of HW thread contexts"),
+    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM_DFLT(activity, "Initial activity count", 0),
 
 #if FULL_SYSTEM
     INIT_PARAM(system, "System object"),
-    INIT_PARAM(cpu_id, "processor ID"),
     INIT_PARAM(itb, "Instruction translation buffer"),
     INIT_PARAM(dtb, "Data translation buffer"),
     INIT_PARAM(profile, ""),
@@ -305,14 +305,15 @@ CREATE_SIM_OBJECT(DerivO3CPU)
     AlphaSimpleParams *params = new AlphaSimpleParams;
 
     params->clock = clock;
+    params->phase = phase;
 
     params->name = getInstanceName();
     params->numberOfThreads = actual_num_threads;
+    params->cpu_id = cpu_id;
     params->activity = activity;
 
 #if FULL_SYSTEM
     params->system = system;
-    params->cpu_id = cpu_id;
     params->itb = itb;
     params->dtb = dtb;
     params->profile = profile;
diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh
index 41f149963..7799d8f05 100644
--- a/src/cpu/o3/alpha/cpu_impl.hh
+++ b/src/cpu/o3/alpha/cpu_impl.hh
@@ -114,6 +114,7 @@ AlphaO3CPU<Impl>::AlphaO3CPU(Params *params) : FullO3CPU<Impl>(params)
 #endif
         // Give the thread the TC.
         this->thread[i]->tc = tc;
+        this->thread[i]->setCpuId(params->cpu_id);
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 66c75a12d..f9e094d75 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -466,7 +466,7 @@ FullO3CPU<Impl>::tick()
             lastRunningCycle = curTick;
             timesIdled++;
         } else {
-            tickEvent.schedule(curTick + cycles(1));
+            tickEvent.schedule(nextCycle(curTick + cycles(1)));
             DPRINTF(O3CPU, "Scheduling next tick!\n");
         }
     }
@@ -880,7 +880,7 @@ FullO3CPU<Impl>::resume()
 #endif
 
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
     _status = Running;
 }
 
@@ -973,11 +973,11 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
         ThreadContext *tc = threadContexts[i];
         if (tc->status() == ThreadContext::Active && _status != Running) {
             _status = Running;
-            tickEvent.schedule(curTick);
+            tickEvent.schedule(nextCycle());
         }
     }
     if (!tickEvent.scheduled())
-        tickEvent.schedule(curTick);
+        tickEvent.schedule(nextCycle());
 
     Port *peer;
     Port *icachePort = fetch.getIcachePort();
@@ -1406,7 +1406,7 @@ FullO3CPU<Impl>::wakeCPU()
 
     idleCycles += (curTick - 1) - lastRunningCycle;
 
-    tickEvent.schedule(curTick);
+    tickEvent.schedule(nextCycle());
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index d217a3e85..b47c2a494 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -146,9 +146,9 @@ class FullO3CPU : public BaseO3CPU
     void scheduleTickEvent(int delay)
     {
         if (tickEvent.squashed())
-            tickEvent.reschedule(curTick + cycles(delay));
+            tickEvent.reschedule(nextCycle(curTick + cycles(delay)));
         else if (!tickEvent.scheduled())
-            tickEvent.schedule(curTick + cycles(delay));
+            tickEvent.schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule tick event, regardless of its current state. */
@@ -186,9 +186,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (activateThreadEvent[tid].squashed())
-            activateThreadEvent[tid].reschedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!activateThreadEvent[tid].scheduled())
-            activateThreadEvent[tid].schedule(curTick + cycles(delay));
+            activateThreadEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule actiavte thread event, regardless of its current state. */
@@ -235,9 +237,11 @@ class FullO3CPU : public BaseO3CPU
     {
         // Schedule thread to activate, regardless of its current state.
         if (deallocateContextEvent[tid].squashed())
-            deallocateContextEvent[tid].reschedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                reschedule(nextCycle(curTick + cycles(delay)));
         else if (!deallocateContextEvent[tid].scheduled())
-            deallocateContextEvent[tid].schedule(curTick + cycles(delay));
+            deallocateContextEvent[tid].
+                schedule(nextCycle(curTick + cycles(delay)));
     }
 
     /** Unschedule thread deallocation in CPU */
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 620daf691..b436ec1c3 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -192,8 +192,6 @@ SimpleRenameMap::rename(RegIndex arch_reg)
         // known that the prev reg was outside the range of normal registers
         // so the free list can avoid adding it.
         prev_reg = renamed_reg;
-
-        assert(renamed_reg < numPhysicalRegs + numMiscRegs);
     }
 
     DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
-- 
cgit v1.2.3


From 31e78b0b92b0a1e066c85622173eb866ded45709 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 23 Mar 2007 11:33:08 -0400
Subject: Two fixes: 1. Requests are handled more properly now.  They assume
 the memory system takes control of the request upon sending out an access. 2.
 load-load ordering is maintained.

src/cpu/base_dyn_inst.hh:
    Update how requests are handled.  The BaseDynInst should not be able to hold a pointer to the request because the request becomes owned by the memory system once it is sent out.

    Also include some functions to allow certain status bits to be cleared.
src/cpu/base_dyn_inst_impl.hh:
    Update how requests are handled.  The BaseDynInst should not be able to hold a pointer to the request because the request becomes owned by the memory system once it is sent out.
src/cpu/o3/fetch_impl.hh:
    General correctness fixes.  retryPkt is not necessarily always set, so handle it properly.  Also consider the cache unblocked only when recvRetry is called.
src/cpu/o3/lsq_unit.hh:
    Handle requests a little more correctly.  Now that the requests aren't pointed to by the DynInst, be sure to delete the request if it's not being used by the memory system.

    Also be sure to not store-load forward from an uncacheable store.
src/cpu/o3/lsq_unit_impl.hh:
    Check to make sure load-load ordering was maintained.

    Also handle requests a little more correctly.

--HG--
extra : convert_revision : e86bead2886d02443cf77bf7a7a1492845e1690f
---
 src/cpu/o3/fetch_impl.hh    |  14 ++++---
 src/cpu/o3/lsq_unit.hh      |  25 ++++++++++-
 src/cpu/o3/lsq_unit_impl.hh | 100 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 107 insertions(+), 32 deletions(-)

(limited to 'src/cpu/o3')

diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index b80fc72e1..a8727a425 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -619,6 +619,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                 fault = TheISA::genMachineCheckFault();
                 delete mem_req;
                 memReq[tid] = NULL;
+                warn("Bad address!\n");
             }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
@@ -669,11 +670,12 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC,
     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
-        cacheBlocked = false;
-        retryTid = -1;
-        delete retryPkt->req;
-        delete retryPkt;
+        if (retryPkt) {
+            delete retryPkt->req;
+            delete retryPkt;
+        }
         retryPkt = NULL;
+        retryTid = -1;
     }
 
     fetchStatus[tid] = Squashing;
@@ -1152,7 +1154,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
             ///FIXME This needs to be more robust in dealing with delay slots
 #if !ISA_HAS_DELAY_SLOT
-            predicted_branch |=
+//	    predicted_branch |=
 #endif
             lookupAndUpdateNextPC(instruction, next_PC, next_NPC);
             predicted_branch |= (next_PC != fetch_NPC);
@@ -1223,7 +1225,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
 #if FULL_SYSTEM
-        assert(numInst != fetchWidth);
+        assert(numInst < fetchWidth);
         // Get a sequence number.
         inst_seq = cpu->getAndIncrementInstSeq();
         // We will use a nop in order to carry the fault.
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 9c7eb7780..704d2183d 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -497,6 +497,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
+
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete req;
         return TheISA::genMachineCheckFault();
     }
 
@@ -534,6 +539,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
         if (store_size == 0)
             continue;
+        else if (storeQueue[store_idx].inst->uncacheable())
+            continue;
+
+        assert(storeQueue[store_idx].inst->effAddrValid);
 
         // Check if the store data is within the lower and upper bounds of
         // addresses that the request needs.
@@ -550,7 +559,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             storeQueue[store_idx].inst->effAddr;
 
         // If the store's data has all of the data needed, we can forward.
-        if (store_has_lower_limit && store_has_upper_limit) {
+        if ((store_has_lower_limit && store_has_upper_limit)) {
             // Get shift amount for offset into the store's data.
             int shift_amt = req->getVaddr() & (store_size - 1);
             // @todo: Magic number, assumes byte addressing
@@ -595,6 +604,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // If it's already been written back, then don't worry about
             // stalling on it.
             if (storeQueue[store_idx].completed) {
+                panic("Should not check one of these");
                 continue;
             }
 
@@ -613,6 +623,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // rescheduled eventually
             iewStage->rescheduleMemInst(load_inst);
             iewStage->decrWb(load_inst->seqNum);
+            load_inst->clearIssued();
             ++lsqRescheduledLoads;
 
             // Do not generate a writeback event as this instruction is not
@@ -621,7 +632,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
                     "Store idx %i to load addr %#x\n",
                     store_idx, req->getVaddr());
 
-            ++lsqBlockedLoads;
+            // Must delete request now that it wasn't handed off to
+            // memory.  This is quite ugly.  @todo: Figure out the
+            // proper place to really handle request deletes.
+            delete req;
+
             return NoFault;
         }
     }
@@ -653,8 +668,11 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
+            delete data_pkt->req;
             delete data_pkt;
 
+            req = NULL;
+
             if (result == Packet::BadAddress) {
                 return TheISA::genMachineCheckFault();
             }
@@ -668,6 +686,9 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // If the cache was blocked, or has become blocked due to the access,
     // handle it.
     if (lsq->cacheBlocked()) {
+        if (req)
+            delete req;
+
         ++lsqCacheBlocked;
 
         iewStage->decrWb(load_inst->seqNum);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index ebd9301f6..ed331386b 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -81,6 +81,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     if (isSwitchedOut() || inst->isSquashed()) {
         iewStage->decrWb(inst->seqNum);
         delete state;
+        delete pkt->req;
         delete pkt;
         return;
     } else {
@@ -94,6 +95,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
     }
 
     delete state;
+    delete pkt->req;
     delete pkt;
 }
 
@@ -403,12 +405,15 @@ template <class Impl>
 Fault
 LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
 {
+    using namespace TheISA;
     // Execute a specific load.
     Fault load_fault = NoFault;
 
     DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
             inst->readPC(),inst->seqNum);
 
+    assert(!inst->isSquashed());
+
     load_fault = inst->initiateAcc();
 
     // If the instruction faulted, then we need to send it along to commit
@@ -418,12 +423,44 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req && inst->req->isUncacheable()) ||
+        if (!(inst->hasRequest() && inst->uncacheable()) ||
             inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
         iewStage->activityThisCycle();
+    } else if (!loadBlocked()) {
+        assert(inst->effAddrValid);
+        int load_idx = inst->lqIdx;
+        incrLdIdx(load_idx);
+        while (load_idx != loadTail) {
+            // Really only need to check loads that have actually executed
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
+            // @todo: Fix this, magic number being used here
+            if (loadQueue[load_idx]->effAddrValid &&
+                (loadQueue[load_idx]->effAddr >> 8) ==
+                (inst->effAddr >> 8)) {
+                // A load incorrectly passed this load.  Squash and refetch.
+                // For now return a fault to show that it was unsuccessful.
+                DynInstPtr violator = loadQueue[load_idx];
+                if (!memDepViolator ||
+                    (violator->seqNum < memDepViolator->seqNum)) {
+                    memDepViolator = violator;
+                } else {
+                    break;
+                }
+
+                ++lsqMemOrderViolation;
+
+                return genMachineCheckFault();
+            }
+
+            incrLdIdx(load_idx);
+        }
     }
 
     return load_fault;
@@ -442,6 +479,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
     DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
             store_inst->readPC(), store_inst->seqNum);
 
+    assert(!store_inst->isSquashed());
+
     // Check the recently completed loads to see if any match this store's
     // address.  If so, then we have a memory ordering violation.
     int load_idx = store_inst->lqIdx;
@@ -465,32 +504,36 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
         ++storesToWB;
     }
 
-    if (!memDepViolator) {
-        while (load_idx != loadTail) {
-            // Really only need to check loads that have actually executed
-            // It's safe to check all loads because effAddr is set to
-            // InvalAddr when the dyn inst is created.
-
-            // @todo: For now this is extra conservative, detecting a
-            // violation if the addresses match assuming all accesses
-            // are quad word accesses.
-
-            // @todo: Fix this, magic number being used here
-            if ((loadQueue[load_idx]->effAddr >> 8) ==
-                (store_inst->effAddr >> 8)) {
-                // A load incorrectly passed this store.  Squash and refetch.
-                // For now return a fault to show that it was unsuccessful.
-                memDepViolator = loadQueue[load_idx];
-                ++lsqMemOrderViolation;
-
-                return genMachineCheckFault();
+    assert(store_inst->effAddrValid);
+    while (load_idx != loadTail) {
+        // Really only need to check loads that have actually executed
+        // It's safe to check all loads because effAddr is set to
+        // InvalAddr when the dyn inst is created.
+
+        // @todo: For now this is extra conservative, detecting a
+        // violation if the addresses match assuming all accesses
+        // are quad word accesses.
+
+        // @todo: Fix this, magic number being used here
+        if (loadQueue[load_idx]->effAddrValid &&
+            (loadQueue[load_idx]->effAddr >> 8) ==
+            (store_inst->effAddr >> 8)) {
+            // A load incorrectly passed this store.  Squash and refetch.
+            // For now return a fault to show that it was unsuccessful.
+            DynInstPtr violator = loadQueue[load_idx];
+            if (!memDepViolator ||
+                (violator->seqNum < memDepViolator->seqNum)) {
+                memDepViolator = violator;
+            } else {
+                break;
             }
 
-            incrLdIdx(load_idx);
+            ++lsqMemOrderViolation;
+
+            return genMachineCheckFault();
         }
 
-        // If we've reached this point, there was no violation.
-        memDepViolator = NULL;
+        incrLdIdx(load_idx);
     }
 
     return store_fault;
@@ -659,7 +702,7 @@ LSQUnit<Impl>::writebackStores()
                 panic("LSQ sent out a bad address for a completed store!");
             }
             // Need to handle becoming blocked on a store.
-            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
                     inst->seqNum);
             isStoreBlocked = true;
@@ -734,6 +777,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         }
     }
 
+    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
+        memDepViolator = NULL;
+    }
+
     int store_idx = storeTail;
     decrStIdx(store_idx);
 
@@ -763,6 +810,11 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
         storeQueue[store_idx].inst = NULL;
         storeQueue[store_idx].canWB = 0;
 
+        // Must delete request now that it wasn't handed off to
+        // memory.  This is quite ugly.  @todo: Figure out the proper
+        // place to really handle request deletes.
+        delete storeQueue[store_idx].req;
+
         storeQueue[store_idx].req = NULL;
         --stores;
 
-- 
cgit v1.2.3


From e21878c3f2fe2f4f2c1dc9a1da18b7b1566e0686 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 23 Mar 2007 11:40:53 -0400
Subject: Handle status bits a little better, as well as non-speculative
 instructions.

src/cpu/o3/iew_impl.hh:
    Allow for slightly more flexible handling of non-speculative instructions.  They can be other classes now, such as loads or stores.

    Also be sure to clear the state associated with squashes that are not used.  i.e. if a squash due to a memory ordering violation happens on the same cycle as an older branch squashing, clear the state associated with the memory ordering violation.

    Lastly don't consider uncached loads to officially be "at commit" until IEW receives the signal back from commit about the load.
src/cpu/o3/inst_queue_impl.hh:
    Don't consider non-speculative instructions to be "at commit" until the IQ has received a signal from commit about the instruction.  This prevents non-speculative instructions from being issued too early.
src/cpu/o3/mem_dep_unit_impl.hh:
    Clear instruction's ability to issue if it's replayed.

--HG--
extra : convert_revision : d69dae878a30821222885485f4dee87170d56eb3
---
 src/cpu/o3/iew_impl.hh          | 62 ++++++++++++++++++++++++++++++-----------
 src/cpu/o3/inst_queue_impl.hh   | 18 ++++++++----
 src/cpu/o3/mem_dep_unit_impl.hh | 10 ++-----
 3 files changed, 62 insertions(+), 28 deletions(-)

(limited to 'src/cpu/o3')

diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index f24eaf2c4..4883e5a5c 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1152,19 +1152,6 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
             // Same as non-speculative stores.
             inst->setCanCommit();
             instQueue.insertBarrier(inst);
-            add_to_iq = false;
-        } else if (inst->isNonSpeculative()) {
-            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
-                    "encountered, skipping.\n", tid);
-
-            // Same as non-speculative stores.
-            inst->setCanCommit();
-
-            // Specifically insert it as nonspeculative.
-            instQueue.insertNonSpec(inst);
-
-            ++iewDispNonSpecInsts;
-
             add_to_iq = false;
         } else if (inst->isNop()) {
             DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
@@ -1193,6 +1180,20 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
         } else {
             add_to_iq = true;
         }
+        if (inst->isNonSpeculative()) {
+            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+                    "encountered, skipping.\n", tid);
+
+            // Same as non-speculative stores.
+            inst->setCanCommit();
+
+            // Specifically insert it as nonspeculative.
+            instQueue.insertNonSpec(inst);
+
+            ++iewDispNonSpecInsts;
+
+            add_to_iq = false;
+        }
 
         // If the instruction queue is not full, then add the
         // instruction.
@@ -1379,6 +1380,7 @@ DefaultIEW<Impl>::executeInsts()
                     predictedNotTakenIncorrect++;
                 }
             } else if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
                 // If there was an ordering violation, then get the
                 // DynInst that caused the violation.  Note that this
                 // clears the violation signal.
@@ -1391,10 +1393,10 @@ DefaultIEW<Impl>::executeInsts()
 
                 // Ensure the violating instruction is older than
                 // current squash
-                if (fetchRedirect[tid] &&
-                    violator->seqNum >= toCommit->squashedSeqNum[tid])
+/*                if (fetchRedirect[tid] &&
+                    violator->seqNum >= toCommit->squashedSeqNum[tid] + 1)
                     continue;
-
+*/
                 fetchRedirect[tid] = true;
 
                 // Tell the instruction queue that a violation has occured.
@@ -1414,6 +1416,33 @@ DefaultIEW<Impl>::executeInsts()
 
                 squashDueToMemBlocked(inst, tid);
             }
+        } else {
+            // Reset any state associated with redirects that will not
+            // be used.
+            if (ldstQueue.violation(tid)) {
+                assert(inst->isMemRef());
+
+                DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
+
+                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
+                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+                DPRINTF(IEW, "Violation will not be handled because "
+                        "already squashing\n");
+
+                ++memOrderViolationEvents;
+            }
+            if (ldstQueue.loadBlocked(tid) &&
+                !ldstQueue.isLoadBlockedHandled(tid)) {
+                DPRINTF(IEW, "Load operation couldn't execute because the "
+                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+                        inst->readPC(), inst->seqNum);
+                DPRINTF(IEW, "Blocked load will not be handled because "
+                        "already squashing\n");
+
+                ldstQueue.setLoadBlockedHandled(tid);
+            }
+
         }
     }
 
@@ -1563,6 +1592,7 @@ DefaultIEW<Impl>::tick()
             //DPRINTF(IEW,"NonspecInst from thread %i",tid);
             if (fromCommit->commitInfo[tid].uncached) {
                 instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+                fromCommit->commitInfo[tid].uncachedLoad->setAtCommit();
             } else {
                 instQueue.scheduleNonSpec(
                     fromCommit->commitInfo[tid].nonSpecSeqNum);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 98b8fa900..87bf60dfa 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -829,6 +829,8 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
     unsigned tid = (*inst_it).second->threadNumber;
 
+    (*inst_it).second->setAtCommit();
+
     (*inst_it).second->setCanIssue();
 
     if (!(*inst_it).second->isMemRef()) {
@@ -960,6 +962,8 @@ template <class Impl>
 void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
+    DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+    resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
 
@@ -984,7 +988,6 @@ InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
     completed_inst->memOpDone = true;
 
     memDepUnit[tid].completed(completed_inst);
-
     count[tid]--;
 }
 
@@ -1084,16 +1087,21 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
 
                     ++iqSquashedOperandsExamined;
                 }
-            } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+            } else if (!squashed_inst->isStoreConditional() ||
+                       !squashed_inst->isCompleted()) {
                 NonSpecMapIt ns_inst_it =
                     nonSpecInsts.find(squashed_inst->seqNum);
                 assert(ns_inst_it != nonSpecInsts.end());
+                if (ns_inst_it == nonSpecInsts.end()) {
+                    assert(squashed_inst->getFault() != NoFault);
+                } else {
 
-                (*ns_inst_it).second = NULL;
+                    (*ns_inst_it).second = NULL;
 
-                nonSpecInsts.erase(ns_inst_it);
+                    nonSpecInsts.erase(ns_inst_it);
 
-                ++iqSquashedNonSpecRemoved;
+                    ++iqSquashedNonSpecRemoved;
+                }
             }
 
             // Might want to also clear out the head of the dependency graph.
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index f19980fd5..64558efaa 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -214,6 +214,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
             inst_entry->regsReady = true;
         }
 
+        // Clear the bit saying this instruction can issue.
+        inst->clearCanIssue();
+
         // Add this instruction to the list of dependents.
         store_entry->dependInsts.push_back(inst_entry);
 
@@ -357,7 +360,6 @@ void
 MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 {
     DynInstPtr temp_inst;
-    bool found_inst = false;
 
     // For now this replay function replays all waiting memory ops.
     while (!instsToReplay.empty()) {
@@ -371,14 +373,8 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
 
         moveToReady(inst_entry);
 
-        if (temp_inst == inst) {
-            found_inst = true;
-        }
-
         instsToReplay.pop_front();
     }
-
-    assert(found_inst);
 }
 
 template <class MemDepPred, class Impl>
-- 
cgit v1.2.3


From 941d3168d02347728181ddb02d486d2fafa14131 Mon Sep 17 00:00:00 2001
From: Kevin Lim <ktlim@umich.edu>
Date: Fri, 23 Mar 2007 13:13:10 -0400
Subject: Updates for commit. 1. Move interrupt handling to a separate function
 to clean up main commit() function a bit.  Also gate the function call off
 properly based on whether or not there are outstanding interrupts, and the
 system is not in PAL mode. 2. Better handling of updating instruction's
 status bits.  Instructions are not marked "atCommit" until other stages view
 it (pushed off to IEW/IQ), and they have been properly handled (faults). 3.
 Don't consider the ROB "empty" for the purpose of other stages until the ROB
 is empty, all stores have written back, and there was no store commits this
 cycle.  The last is necessary in case a store committed, in which case it
 would look like all stores have written back but in actuality have not.

src/cpu/o3/commit.hh:
    Slightly modify how interrupts are handled.  Also include some extra bools to keep track of state properly.
src/cpu/o3/commit_impl.hh:
    Slightly modify how interrupts are handled.  Also include some extra bools to keep track of state.

    General correctness updates, most specifically for when commit broadcasts to other stages that the ROB is empty.

--HG--
extra : convert_revision : 682ec6ccf4ee6ed0c8a030ceaba1c90a3619d102
---
 src/cpu/o3/commit.hh      |  15 ++++++
 src/cpu/o3/commit_impl.hh | 125 +++++++++++++++++++++++++++++++---------------
 2 files changed, 101 insertions(+), 39 deletions(-)

(limited to 'src/cpu/o3')

diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 0d7d82529..e2ad23954 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -247,6 +247,11 @@ class DefaultCommit
     /** Handles squashing due to an TC write. */
     void squashFromTC(unsigned tid);
 
+#if FULL_SYSTEM
+    /** Handles processing an interrupt. */
+    void handleInterrupt();
+#endif // FULL_SYSTEM
+
     /** Commits as many instructions as possible. */
     void commitInsts();
 
@@ -409,6 +414,16 @@ class DefaultCommit
     /** The sequence number of the youngest valid instruction in the ROB. */
     InstSeqNum youngestSeqNum[Impl::MaxThreads];
 
+    /** Records if there is a trap currently in flight. */
+    bool trapInFlight[Impl::MaxThreads];
+
+    /** Records if there were any stores committed this cycle. */
+    bool committedStores[Impl::MaxThreads];
+
+    /** Records if commit should check if the ROB is truly empty (see
+        commit_impl.hh). */
+    bool checkEmptyROB[Impl::MaxThreads];
+
     /** Pointer to the list of active threads. */
     std::list<unsigned> *activeThreads;
 
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index f1457922c..b42a6f523 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -118,6 +118,9 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
     for (int i=0; i < numThreads; i++) {
         commitStatus[i] = Idle;
         changedROBNumEntries[i] = false;
+        checkEmptyROB[i] = false;
+        trapInFlight[i] = false;
+        committedStores[i] = false;
         trapSquash[i] = false;
         tcSquash[i] = false;
         PC[i] = nextPC[i] = nextNPC[i] = 0;
@@ -335,6 +338,7 @@ DefaultCommit<Impl>::initStage()
     for (int i=0; i < numThreads; i++) {
         toIEW->commitInfo[i].usedROB = true;
         toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+        toIEW->commitInfo[i].emptyROB = true;
     }
 
     cpu->activityThisCycle();
@@ -473,14 +477,14 @@ DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
     TrapEvent *trap = new TrapEvent(this, tid);
 
     trap->schedule(curTick + trapLatency);
-
-    thread[tid]->trapPending = true;
+    trapInFlight[tid] = true;
 }
 
 template <class Impl>
 void
 DefaultCommit<Impl>::generateTCEvent(unsigned tid)
 {
+    assert(!trapInFlight[tid]);
     DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
 
     tcSquash[tid] = true;
@@ -495,7 +499,7 @@ DefaultCommit<Impl>::squashAll(unsigned tid)
     // Hopefully this doesn't mess things up.  Basically I want to squash
     // all instructions of this thread.
     InstSeqNum squashed_inst = rob->isEmpty() ?
-        0 : rob->readHeadInst(tid)->seqNum - 1;;
+        0 : rob->readHeadInst(tid)->seqNum - 1;
 
     // All younger instructions will be squashed. Set the sequence
     // number as the youngest instruction in the ROB (0 in this case.
@@ -532,6 +536,7 @@ DefaultCommit<Impl>::squashFromTrap(unsigned tid)
 
     thread[tid]->trapPending = false;
     thread[tid]->inSyscall = false;
+    trapInFlight[tid] = false;
 
     trapSquash[tid] = false;
 
@@ -580,6 +585,10 @@ DefaultCommit<Impl>::tick()
     while (threads != end) {
         unsigned tid = *threads++;
 
+        // Clear the bit saying if the thread has committed stores
+        // this cycle.
+        committedStores[tid] = false;
+
         if (commitStatus[tid] == ROBSquashing) {
 
             if (rob->isDoneSquashing(tid)) {
@@ -635,16 +644,11 @@ DefaultCommit<Impl>::tick()
     updateStatus();
 }
 
+#if FULL_SYSTEM
 template <class Impl>
 void
-DefaultCommit<Impl>::commit()
+DefaultCommit<Impl>::handleInterrupt()
 {
-
-    //////////////////////////////////////
-    // Check for interrupts
-    //////////////////////////////////////
-
-#if FULL_SYSTEM
     if (interrupt != NoFault) {
         // Wait until the ROB is empty and all stores have drained in
         // order to enter the interrupt.
@@ -653,6 +657,12 @@ DefaultCommit<Impl>::commit()
             // an interrupt needed to be handled.
             DPRINTF(Commit, "Interrupt detected.\n");
 
+            Fault new_interrupt = cpu->getInterrupts();
+            assert(new_interrupt == interrupt);
+
+            // Clear the interrupt now that it's going to be handled
+            toIEW->commitInfo[0].clearInterrupt = true;
+
             assert(!thread[0]->inSyscall);
             thread[0]->inSyscall = true;
 
@@ -666,16 +676,14 @@ DefaultCommit<Impl>::commit()
             // Generate trap squash event.
             generateTrapEvent(0);
 
-            // Clear the interrupt now that it's been handled
-            toIEW->commitInfo[0].clearInterrupt = true;
             interrupt = NoFault;
         } else {
             DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
         }
-    } else if (cpu->check_interrupts(cpu->tcBase(0)) &&
-        commitStatus[0] != TrapPending &&
-        !trapSquash[0] &&
-        !tcSquash[0]) {
+    } else if (commitStatus[0] != TrapPending &&
+               cpu->check_interrupts(cpu->tcBase(0)) &&
+               !trapSquash[0] &&
+               !tcSquash[0]) {
         // Process interrupts if interrupts are enabled, not in PAL
         // mode, and no other traps or external squashes are currently
         // pending.
@@ -691,7 +699,21 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[0].interruptPending = true;
         }
     }
+}
+#endif // FULL_SYSTEM
+
+template <class Impl>
+void
+DefaultCommit<Impl>::commit()
+{
 
+#if FULL_SYSTEM
+    // Check for any interrupt, and start processing it.  Or if we
+    // have an outstanding interrupt and are at a point when it is
+    // valid to take an interrupt, process it.
+    if (cpu->check_interrupts(cpu->tcBase(0))) {
+        handleInterrupt();
+    }
 #endif // FULL_SYSTEM
 
     ////////////////////////////////////
@@ -709,6 +731,7 @@ DefaultCommit<Impl>::commit()
             assert(!tcSquash[tid]);
             squashFromTrap(tid);
         } else if (tcSquash[tid] == true) {
+            assert(commitStatus[tid] != TrapPending);
             squashFromTC(tid);
         }
 
@@ -753,6 +776,7 @@ DefaultCommit<Impl>::commit()
                 bdelay_done_seq_num--;
 #endif
             }
+
             // All younger instructions will be squashed. Set the sequence
             // number as the youngest instruction in the ROB.
             youngestSeqNum[tid] = squashed_inst;
@@ -817,13 +841,29 @@ DefaultCommit<Impl>::commit()
             toIEW->commitInfo[tid].usedROB = true;
             toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
 
-            if (rob->isEmpty(tid)) {
-                toIEW->commitInfo[tid].emptyROB = true;
-            }
-
             wroteToTimeBuffer = true;
             changedROBNumEntries[tid] = false;
+            if (rob->isEmpty(tid))
+                checkEmptyROB[tid] = true;
         }
+
+        // ROB is only considered "empty" for previous stages if: a)
+        // ROB is empty, b) there are no outstanding stores, c) IEW
+        // stage has received any information regarding stores that
+        // committed.
+        // c) is checked by making sure to not consider the ROB empty
+        // on the same cycle as when stores have been committed.
+        // @todo: Make this handle multi-cycle communication between
+        // commit and IEW.
+        if (checkEmptyROB[tid] && rob->isEmpty(tid) &&
+            !iewStage->hasStoresToWB() && !committedStores[tid]) {
+            checkEmptyROB[tid] = false;
+            toIEW->commitInfo[tid].usedROB = true;
+            toIEW->commitInfo[tid].emptyROB = true;
+            toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+            wroteToTimeBuffer = true;
+        }
+
     }
 }
 
@@ -966,8 +1006,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         // and committed this instruction.
         thread[tid]->funcExeInst--;
 
-        head_inst->setAtCommit();
-
         if (head_inst->isNonSpeculative() ||
             head_inst->isStoreConditional() ||
             head_inst->isMemBarrier() ||
@@ -977,19 +1015,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
                     "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
-            // Hack to make sure syscalls/memory barriers/quiesces
-            // aren't executed until all stores write back their data.
-            // This direct communication shouldn't be used for
-            // anything other than this.
-            if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
-                    head_inst->isQuiesce()) &&
-                iewStage->hasStoresToWB())
-            {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
                 DPRINTF(Commit, "Waiting for all stores to writeback.\n");
                 return false;
-            } else if (inst_num > 0 || iewStage->hasStoresToWB()) {
-                DPRINTF(Commit, "Waiting to become head of commit.\n");
-                return false;
             }
 
             toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
@@ -1002,6 +1030,12 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
             return false;
         } else if (head_inst->isLoad()) {
+            if (inst_num > 0 || iewStage->hasStoresToWB()) {
+                DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+                return false;
+            }
+
+            assert(head_inst->uncacheable());
             DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
                     head_inst->seqNum, head_inst->readPC());
 
@@ -1025,8 +1059,11 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         panic("Thread sync instructions are not handled yet.\n");
     }
 
+    // Check if the instruction caused a fault.  If so, trap.
+    Fault inst_fault = head_inst->getFault();
+
     // Stores mark themselves as completed.
-    if (!head_inst->isStore()) {
+    if (!head_inst->isStore() && inst_fault == NoFault) {
         head_inst->setCompleted();
     }
 
@@ -1038,9 +1075,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     }
 #endif
 
-    // Check if the instruction caused a fault.  If so, trap.
-    Fault inst_fault = head_inst->getFault();
-
     // DTB will sometimes need the machine instruction for when
     // faults happen.  So we will set it here, prior to the DTB
     // possibly needing it for its fault.
@@ -1048,7 +1082,6 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
         static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
 
     if (inst_fault != NoFault) {
-        head_inst->setCompleted();
         DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
                 head_inst->seqNum, head_inst->readPC());
 
@@ -1057,6 +1090,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
             return false;
         }
 
+        head_inst->setCompleted();
+
 #if USE_CHECKER
         if (cpu->checker && head_inst->isStore()) {
             cpu->checker->verify(head_inst);
@@ -1082,6 +1117,13 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
 
         commitStatus[tid] = TrapPending;
 
+        if (head_inst->traceData) {
+            head_inst->traceData->setFetchSeq(head_inst->seqNum);
+            head_inst->traceData->setCPSeq(thread[tid]->numInst);
+            head_inst->traceData->finalize();
+            head_inst->traceData = NULL;
+        }
+
         // Generate trap squash event.
         generateTrapEvent(tid);
 //        warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
@@ -1122,6 +1164,10 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
     // Finally clear the head ROB entry.
     rob->retireHead(tid);
 
+    // If this was a store, record it for this cycle.
+    if (head_inst->isStore())
+        committedStores[tid] = true;
+
     // Return true to indicate that we have committed an instruction.
     return true;
 }
@@ -1166,7 +1212,8 @@ DefaultCommit<Impl>::getInsts()
         int tid = inst->threadNumber;
 
         if (!inst->isSquashed() &&
-            commitStatus[tid] != ROBSquashing) {
+            commitStatus[tid] != ROBSquashing &&
+            commitStatus[tid] != TrapPending) {
             changedROBNumEntries[tid] = true;
 
             DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
-- 
cgit v1.2.3