Merge zizzer.eecs.umich.edu:/bk/newmem

into zeep.eecs.umich.edu:/home/gblack/m5/newmem --HG-- extra : convert_revision : 30b2475ba034550376455e1bc0e52e19a200fd5a
author: Gabe Black <gblack@eecs.umich.edu> 2006-10-12 10:58:45 -0400
committer: Gabe Black <gblack@eecs.umich.edu> 2006-10-12 10:58:45 -0400
commit: 866cfaf9dc596d8547e14bc2133fb962776572a7 (patch)
tree: 19b82a8021533e8bc2e35f14fb0b6a0440756814 /src/cpu/o3
parent: 6a31898a88a9ecced399ccf50636831c21d4a75e (diff)
parent: 78aec04b660544ea7af80d76912b4422c4426602 (diff)
download: gem5-866cfaf9dc596d8547e14bc2133fb962776572a7.tar.xz
11 files changed, 138 insertions, 65 deletions
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index c80e4d8c1..ecf6ed632 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -342,12 +342,6 @@ DefaultCommit<Impl>::drain()
 {
     drainPending = true;
 
-    // If it's already drained, return true.
-    if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
-        cpu->signalDrained();
-        return true;
-    }
-
     return false;
 }
 
@@ -1218,16 +1212,16 @@ DefaultCommit<Impl>::skidInsert()
 
     for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) {
         DynInstPtr inst = fromRename->insts[inst_num];
-        int tid = inst->threadNumber;
 
         if (!inst->isSquashed()) {
             DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
-                    "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+                    "skidBuffer.\n", inst->readPC(), inst->seqNum,
+                    inst->threadNumber);
             skidBuffer.push(inst);
         } else {
             DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
                     "squashed, skipping.\n",
-                    inst->readPC(), inst->seqNum, tid);
+                    inst->readPC(), inst->seqNum, inst->threadNumber);
         }
     }
 }
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 7386dfadd..4c9a8e91f 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -88,7 +88,7 @@ FullO3CPU<Impl>::TickEvent::description()
 
 template <class Impl>
 FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent()
-    : Event(&mainEventQueue, CPU_Tick_Pri)
+    : Event(&mainEventQueue, CPU_Switch_Pri)
 {
 }
 
@@ -135,7 +135,8 @@ void
 FullO3CPU<Impl>::DeallocateContextEvent::process()
 {
     cpu->deactivateThread(tid);
-    cpu->removeThread(tid);
+    if (remove)
+        cpu->removeThread(tid);
 }
 
 template <class Impl>
@@ -191,7 +192,11 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
       deferRegistration(params->deferRegistration),
       numThreads(number_of_threads)
 {
-    _status = Idle;
+    if (!deferRegistration) {
+        _status = Running;
+    } else {
+        _status = Idle;
+    }
 
     checker = NULL;
 
@@ -304,6 +309,9 @@ FullO3CPU<Impl>::FullO3CPU(Params *params)
 
                             tid,
                             bindRegs);
+
+        activateThreadEvent[tid].init(tid, this);
+        deallocateContextEvent[tid].init(tid, this);
     }
 
     rename.setRenameMap(renameMap);
@@ -447,13 +455,16 @@ FullO3CPU<Impl>::tick()
     if (!tickEvent.scheduled()) {
         if (_status == SwitchedOut ||
             getState() == SimObject::Drained) {
+            DPRINTF(O3CPU, "Switched out!\n");
             // increment stat
             lastRunningCycle = curTick;
-        } else if (!activityRec.active()) {
+        } else if (!activityRec.active() || _status == Idle) {
+            DPRINTF(O3CPU, "Idle!\n");
             lastRunningCycle = curTick;
             timesIdled++;
         } else {
             tickEvent.schedule(curTick + cycles(1));
+            DPRINTF(O3CPU, "Scheduling next tick!\n");
         }
     }
 
@@ -512,6 +523,8 @@ FullO3CPU<Impl>::activateThread(unsigned tid)
     list<unsigned>::iterator isActive = find(
         activeThreads.begin(), activeThreads.end(), tid);
 
+    DPRINTF(O3CPU, "[tid:%i]: Calling activate thread.\n", tid);
+
     if (isActive == activeThreads.end()) {
         DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
                 tid);
@@ -528,6 +541,8 @@ FullO3CPU<Impl>::deactivateThread(unsigned tid)
     list<unsigned>::iterator thread_it =
         find(activeThreads.begin(), activeThreads.end(), tid);
 
+    DPRINTF(O3CPU, "[tid:%i]: Calling deactivate thread.\n", tid);
+
     if (thread_it != activeThreads.end()) {
         DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
                 tid);
@@ -548,7 +563,7 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
         activateThread(tid);
     }
 
-    if(lastActivatedCycle < curTick) {
+    if (lastActivatedCycle < curTick) {
         scheduleTickEvent(delay);
 
         // Be sure to signal that there's some activity so the CPU doesn't
@@ -563,17 +578,20 @@ FullO3CPU<Impl>::activateContext(int tid, int delay)
 }
 
 template <class Impl>
-void
-FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+bool
+FullO3CPU<Impl>::deallocateContext(int tid, bool remove, int delay)
 {
     // Schedule removal of thread data from CPU
     if (delay){
         DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
                 "on cycle %d\n", tid, curTick + cycles(delay));
-        scheduleDeallocateContextEvent(tid, delay);
+        scheduleDeallocateContextEvent(tid, remove, delay);
+        return false;
     } else {
         deactivateThread(tid);
-        removeThread(tid);
+        if (remove)
+            removeThread(tid);
+        return true;
     }
 }
 
@@ -582,8 +600,9 @@ void
 FullO3CPU<Impl>::suspendContext(int tid)
 {
     DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
-    deactivateThread(tid);
-    if (activeThreads.size() == 0)
+    bool deallocated = deallocateContext(tid, false, 1);
+    // If this was the last thread then unschedule the tick event.
+    if ((activeThreads.size() == 1 && !deallocated) || activeThreads.size() == 0)
         unscheduleTickEvent();
     _status = Idle;
 }
@@ -594,7 +613,7 @@ FullO3CPU<Impl>::haltContext(int tid)
 {
     //For now, this is the same as deallocate
     DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
-    deallocateContext(tid, 1);
+    deallocateContext(tid, true, 1);
 }
 
 template <class Impl>
@@ -682,10 +701,17 @@ FullO3CPU<Impl>::removeThread(unsigned tid)
     assert(iew.ldstQueue.getCount(tid) == 0);
 
     // Reset ROB/IQ/LSQ Entries
+
+    // Commented out for now.  This should be possible to do by
+    // telling all the pipeline stages to drain first, and then
+    // checking until the drain completes.  Once the pipeline is
+    // drained, call resetEntries(). - 10-09-06 ktlim
+/*
     if (activeThreads.size() >= 1) {
         commit.rob->resetEntries();
         iew.resetEntries();
     }
+*/
 }
 
 
@@ -824,7 +850,9 @@ template <class Impl>
 void
 FullO3CPU<Impl>::resume()
 {
+#if FULL_SYSTEM
     assert(system->getMemoryMode() == System::Timing);
+#endif
     fetch.resume();
     decode.resume();
     rename.resume();
@@ -935,6 +963,25 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
     }
     if (!tickEvent.scheduled())
         tickEvent.schedule(curTick);
+
+    Port *peer;
+    Port *icachePort = fetch.getIcachePort();
+    if (icachePort->getPeer() == NULL) {
+        peer = oldCPU->getPort("icache_port")->getPeer();
+        icachePort->setPeer(peer);
+    } else {
+        peer = icachePort->getPeer();
+    }
+    peer->setPeer(icachePort);
+
+    Port *dcachePort = iew.getDcachePort();
+    if (dcachePort->getPeer() == NULL) {
+        peer = oldCPU->getPort("dcache_port")->getPeer();
+        dcachePort->setPeer(peer);
+    } else {
+        peer = dcachePort->getPeer();
+    }
+    peer->setPeer(dcachePort);
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index dcdcd1fe6..fe510519c 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -202,9 +202,12 @@ class FullO3CPU : public BaseO3CPU
     class DeallocateContextEvent : public Event
     {
       private:
-        /** Number of Thread to Activate */
+        /** Number of Thread to deactivate */
         int tid;
 
+        /** Should the thread be removed from the CPU? */
+        bool remove;
+
         /** Pointer to the CPU. */
         FullO3CPU<Impl> *cpu;
 
@@ -218,12 +221,15 @@ class FullO3CPU : public BaseO3CPU
         /** Processes the event, calling activateThread() on the CPU. */
         void process();
 
+        /** Sets whether the thread should also be removed from the CPU. */
+        void setRemove(bool _remove) { remove = _remove; }
+
         /** Returns the description of the event. */
         const char *description();
     };
 
     /** Schedule cpu to deallocate thread context.*/
-    void scheduleDeallocateContextEvent(int tid, int delay)
+    void scheduleDeallocateContextEvent(int tid, bool remove, int delay)
     {
         // Schedule thread to activate, regardless of its current state.
         if (deallocateContextEvent[tid].squashed())
@@ -296,9 +302,9 @@ class FullO3CPU : public BaseO3CPU
     void suspendContext(int tid);
 
     /** Remove Thread from Active Threads List &&
-     *  Remove Thread Context from CPU.
+     *  Possibly Remove Thread Context from CPU.
      */
-    void deallocateContext(int tid, int delay = 1);
+    bool deallocateContext(int tid, bool remove, int delay = 1);
 
     /** Remove Thread from Active Threads List &&
      *  Remove Thread Context from CPU.
@@ -626,11 +632,6 @@ class FullO3CPU : public BaseO3CPU
     /** Pointers to all of the threads in the CPU. */
     std::vector<Thread *> thread;
 
-    /** Pointer to the icache interface. */
-    MemInterface *icacheInterface;
-    /** Pointer to the dcache interface. */
-    MemInterface *dcacheInterface;
-
     /** Whether or not the CPU should defer its registration. */
     bool deferRegistration;
 
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 1a2ca32a4..280bf0e71 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -96,7 +96,7 @@ class DefaultFetch
         /** Returns the address ranges of this device. */
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); }
+        { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
 
         /** Timing version of receive.  Handles setting fetch to the
          * proper status to start fetching. */
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 3c47c39fa..072580af7 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -63,7 +63,7 @@ template<class Impl>
 void
 DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
 {
-    panic("DefaultFetch doesn't expect recvFunctional callback!");
+    warn("Default fetch doesn't update it's state from a functional call.");
 }
 
 template<class Impl>
@@ -599,7 +599,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     if (fault == NoFault) {
 #if 0
         if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
-            memReq[tid]->flags & UNCACHEABLE) {
+            memReq[tid]->isUncacheable()) {
             DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
                     "misspeculating path)!",
                     memReq[tid]->paddr);
@@ -623,6 +623,11 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         // Now do the timing access to see whether or not the instruction
         // exists within the cache.
         if (!icachePort->sendTiming(data_pkt)) {
+            if (data_pkt->result == Packet::BadAddress) {
+                fault = TheISA::genMachineCheckFault();
+                delete mem_req;
+                memReq[tid] = NULL;
+            }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index b2baae296..ba5260fe2 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -600,6 +600,11 @@ template<class Impl>
 void
 DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
 {
+    // This function should not be called after writebackInsts in a
+    // single cycle.  That will cause problems with an instruction
+    // being added to the queue to commit without being processed by
+    // writebackInsts prior to being sent to commit.
+
     // First check the time slot that this instruction will write
     // to.  If there are free write ports at the time, then go ahead
     // and write the instruction to that time.  If there are not,
@@ -1286,6 +1291,7 @@ DefaultIEW<Impl>::executeInsts()
                 } else if (fault != NoFault) {
                     // If the instruction faulted, then we need to send it along to commit
                     // without the instruction completing.
+                    DPRINTF(IEW, "Store has fault! [sn:%lli]\n", inst->seqNum);
 
                     // Send this instruction to commit, also make sure iew stage
                     // realizes there is activity.
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 190734dc2..6b12d75b4 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -311,7 +311,7 @@ class LSQ {
         /** Returns the address ranges of this device. */
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             AddrRangeList &snoop)
-        { resp.clear(); snoop.clear(); }
+        { resp.clear(); snoop.clear(); snoop.push_back(RangeSize(0,-1)); }
 
         /** Timing version of receive.  Handles writing back and
          * completing the load or store that has returned from
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 2bbab71f0..7b7d1eb8e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -46,7 +46,7 @@ template <class Impl>
 void
 LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
 {
-    panic("O3CPU doesn't expect recvFunctional callback!");
+    warn("O3CPU doesn't update things on a recvFunctional.");
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 90d1a3d53..11a02e7c7 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -492,7 +492,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // A bit of a hackish way to get uncached accesses to work only if they're
     // at the head of the LSQ and are ready to commit (at the head of the ROB
     // too).
-    if (req->getFlags() & UNCACHEABLE &&
+    if (req->isUncacheable() &&
         (load_idx != loadHead || !load_inst->isAtCommit())) {
         iewStage->rescheduleMemInst(load_inst);
         ++lsqRescheduledLoads;
@@ -509,7 +509,7 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             load_idx, store_idx, storeHead, req->getPaddr());
 
 #if FULL_SYSTEM
-    if (req->getFlags() & LOCKED) {
+    if (req->isLocked()) {
         cpu->lockAddr = req->getPaddr();
         cpu->lockFlag = true;
     }
@@ -626,18 +626,30 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
     ++usedPorts;
 
-    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
-    data_pkt->dataStatic(load_inst->memData);
-
-    LSQSenderState *state = new LSQSenderState;
-    state->isLoad = true;
-    state->idx = load_idx;
-    state->inst = load_inst;
-    data_pkt->senderState = state;
-
     // if we the cache is not blocked, do cache access
     if (!lsq->cacheBlocked()) {
+        PacketPtr data_pkt =
+            new Packet(req, Packet::ReadReq, Packet::Broadcast);
+        data_pkt->dataStatic(load_inst->memData);
+
+        LSQSenderState *state = new LSQSenderState;
+        state->isLoad = true;
+        state->idx = load_idx;
+        state->inst = load_inst;
+        data_pkt->senderState = state;
+
         if (!dcachePort->sendTiming(data_pkt)) {
+            Packet::Result result = data_pkt->result;
+
+            // Delete state and data packet because a load retry
+            // initiates a pipeline restart; it does not retry.
+            delete state;
+            delete data_pkt;
+
+            if (result == Packet::BadAddress) {
+                return TheISA::genMachineCheckFault();
+            }
+
             // If the access didn't succeed, tell the LSQ by setting
             // the retry thread id.
             lsq->setRetryTid(lsqID);
@@ -664,16 +676,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         return NoFault;
     }
 
-    if (data_pkt->result != Packet::Success) {
-        DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
-                load_inst->seqNum);
-    } else {
-        DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-                load_inst->seqNum);
-    }
-
     return NoFault;
 }
 
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 98bea74fb..3f9db912f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -416,7 +416,7 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
         // realizes there is activity.
         // Mark it as executed unless it is an uncached load that
         // needs to hit the head of commit.
-        if (!(inst->req->getFlags() & UNCACHEABLE) || inst->isAtCommit()) {
+        if (!(inst->req->isUncacheable()) || inst->isAtCommit()) {
             inst->setExecuted();
         }
         iewStage->instToCommit(inst);
@@ -608,21 +608,30 @@ LSQUnit<Impl>::writebackStores()
 
         DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
                 "to Addr:%#x, data:%#x [sn:%lli]\n",
-                storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+                storeWBIdx, inst->readPC(),
                 req->getPaddr(), *(inst->memData),
-                storeQueue[storeWBIdx].inst->seqNum);
+                inst->seqNum);
 
         // @todo: Remove this SC hack once the memory system handles it.
-        if (req->getFlags() & LOCKED) {
-            if (req->getFlags() & UNCACHEABLE) {
+        if (req->isLocked()) {
+            if (req->isUncacheable()) {
                 req->setScResult(2);
             } else {
                 if (cpu->lockFlag) {
                     req->setScResult(1);
+                    DPRINTF(LSQUnit, "Store conditional [sn:%lli] succeeded.",
+                            inst->seqNum);
                 } else {
                     req->setScResult(0);
                     // Hack: Instantly complete this store.
-                    completeDataAccess(data_pkt);
+//                    completeDataAccess(data_pkt);
+                    DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed.  "
+                            "Instantly completing it.\n",
+                            inst->seqNum);
+                    WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
+                    wb->schedule(curTick + 1);
+                    delete state;
+                    completeStore(storeWBIdx);
                     incrStIdx(storeWBIdx);
                     continue;
                 }
@@ -633,7 +642,13 @@ LSQUnit<Impl>::writebackStores()
         }
 
         if (!dcachePort->sendTiming(data_pkt)) {
+            if (data_pkt->result == Packet::BadAddress) {
+                panic("LSQ sent out a bad address for a completed store!");
+            }
             // Need to handle becoming blocked on a store.
+            DPRINTF(IEW, "D-Cache became blcoked when writing [sn:%lli], will"
+                    "retry later\n",
+                    inst->seqNum);
             isStoreBlocked = true;
             ++lsqCacheBlocked;
             assert(retryPkt == NULL);
@@ -880,6 +895,9 @@ LSQUnit<Impl>::recvRetry()
         assert(retryPkt != NULL);
 
         if (dcachePort->sendTiming(retryPkt)) {
+            if (retryPkt->result == Packet::BadAddress) {
+                panic("LSQ sent out a bad address for a completed store!");
+            }
             storePostSend(retryPkt);
             retryPkt = NULL;
             isStoreBlocked = false;
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index 25e1db21c..2bc194d53 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -165,14 +165,14 @@ template <class Impl>
 void
 O3ThreadContext<Impl>::deallocate(int delay)
 {
-    DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n",
-            getThreadNum());
+    DPRINTF(O3CPU, "Calling deallocate on Thread Context %d delay %d\n",
+            getThreadNum(), delay);
 
     if (thread->status() == ThreadContext::Unallocated)
         return;
 
     thread->setStatus(ThreadContext::Unallocated);
-    cpu->deallocateContext(thread->readTid(), delay);
+    cpu->deallocateContext(thread->readTid(), true, delay);
 }
 
 template <class Impl>
author	Gabe Black <gblack@eecs.umich.edu>	2006-10-12 10:58:45 -0400
committer	Gabe Black <gblack@eecs.umich.edu>	2006-10-12 10:58:45 -0400
commit	866cfaf9dc596d8547e14bc2133fb962776572a7 (patch)
tree	19b82a8021533e8bc2e35f14fb0b6a0440756814 /src/cpu/o3
parent	6a31898a88a9ecced399ccf50636831c21d4a75e (diff)
parent	78aec04b660544ea7af80d76912b4422c4426602 (diff)
download	gem5-866cfaf9dc596d8547e14bc2133fb962776572a7.tar.xz