Fixes to get new CPU model working for simple test case. The CPU does not yet support retrying accesses.

src/cpu/base_dyn_inst.cc: Delete the allocated data in destructor. src/cpu/base_dyn_inst.hh: Only copy the addresses if the translation succeeded. src/cpu/o3/alpha_cpu.hh: Return actual translating port. Don't panic on setNextNPC() as it's always called, regardless of the architecture, when the process initializes. src/cpu/o3/alpha_cpu_impl.hh: Pass in memobject to the thread state in SE mode. src/cpu/o3/commit_impl.hh: Initialize all variables. src/cpu/o3/decode_impl.hh: Handle early resolution of branches properly. src/cpu/o3/fetch.hh: Switch structure back to requests. src/cpu/o3/fetch_impl.hh: Initialize all variables, create/delete requests properly. src/cpu/o3/lsq_unit.hh: Include sender state along with the packet. Also include a more generic writeback event that's only used for stores forwarding data to loads. src/cpu/o3/lsq_unit_impl.hh: Redo writeback code to support the response path of the memory system. src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/mem_dep_unit_impl.hh: Wrap variables in #ifdefs. src/cpu/o3/store_set.cc: Include to get panic() function. src/cpu/o3/thread_state.hh: Create with MemObject as well. src/cpu/thread_state.hh: Have a translating port in the thread state object. src/python/m5/objects/AlphaFullCPU.py: Mem parameter no longer needed. --HG-- extra : convert_revision : a99381fb25cb183322882ce20935a6f3d1f2b64d
author: Kevin Lim <ktlim@umich.edu> 2006-06-05 18:14:39 -0400
committer: Kevin Lim <ktlim@umich.edu> 2006-06-05 18:14:39 -0400
commit: 090496bf2d4c0f55f7f5869a374b4ec3826bccbc (patch)
tree: 4be899992389661b5cd60f2f067e39e719577430
parent: 295c7a908cfeecc7276f559ff53282a177f4eb66 (diff)
download: gem5-090496bf2d4c0f55f7f5869a374b4ec3826bccbc.tar.xz
16 files changed, 229 insertions, 126 deletions
diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc
index 66e425d5c..a62930560 100644
--- a/src/cpu/base_dyn_inst.cc
+++ b/src/cpu/base_dyn_inst.cc
@@ -96,12 +96,14 @@ void
 BaseDynInst<Impl>::initVars()
 {
     req = NULL;
+    memData = NULL;
     effAddr = 0;
     physEffAddr = 0;
     storeSize = 0;
 
     readyRegs = 0;
 
+    // May want to turn this into a bit vector or something.
     completed = false;
     resultReady = false;
     canIssue = false;
@@ -161,7 +163,11 @@ template <class Impl>
 BaseDynInst<Impl>::~BaseDynInst()
 {
     if (req) {
-        req = NULL;
+        delete req;
+    }
+
+    if (memData) {
+        delete [] memData;
     }
 
     if (traceData) {
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index a250427ce..1f2b44e02 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -660,11 +660,11 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 
     fault = cpu->translateDataReadReq(req);
 
-    effAddr = req->getVaddr();
-    physEffAddr = req->getPaddr();
-    memReqFlags = req->getFlags();
-
     if (fault == NoFault) {
+        effAddr = req->getVaddr();
+        physEffAddr = req->getPaddr();
+        memReqFlags = req->getFlags();
+
 #if FULL_SYSTEM
         if (cpu->system->memctrl->badaddr(physEffAddr)) {
             fault = TheISA::genMachineCheckFault();
@@ -715,11 +715,10 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     fault = cpu->translateDataWriteReq(req);
 
-    effAddr = req->getVaddr();
-    physEffAddr = req->getPaddr();
-    memReqFlags = req->getFlags();
-
     if (fault == NoFault) {
+        effAddr = req->getVaddr();
+        physEffAddr = req->getPaddr();
+        memReqFlags = req->getFlags();
 #if FULL_SYSTEM
         if (cpu->system->memctrl->badaddr(physEffAddr)) {
             fault = TheISA::genMachineCheckFault();
diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh
index 2e5c856a8..3c16c3b2e 100644
--- a/src/cpu/o3/alpha_cpu.hh
+++ b/src/cpu/o3/alpha_cpu.hh
@@ -96,7 +96,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
         /** Reads this CPU's ID. */
         virtual int readCpuId() { return cpu->cpu_id; }
 
-        virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
+        virtual TranslatingPort *getMemPort() { return thread->port; }
 
 #if FULL_SYSTEM
         /** Returns a pointer to the system. */
@@ -226,7 +226,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
         }
 
         virtual void setNextNPC(uint64_t val)
-        { panic("Alpha has no NextNPC!"); }
+        { }
 
         /** Reads a miscellaneous register. */
         virtual MiscReg readMiscReg(int misc_reg)
diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh
index ad4401f7e..7c136638d 100644
--- a/src/cpu/o3/alpha_cpu_impl.hh
+++ b/src/cpu/o3/alpha_cpu_impl.hh
@@ -73,7 +73,8 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
         if (i < params->workload.size()) {
             DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
                     i, this->thread[i]);
-            this->thread[i] = new Thread(this, i, params->workload[i], i);
+            this->thread[i] = new Thread(this, i, params->workload[i],
+                                         i, params->mem);
 
             this->thread[i]->setStatus(ExecContext::Suspended);
             //usedTids[i] = true;
@@ -83,7 +84,7 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
             //when scheduling threads to CPU
             Process* dummy_proc = NULL;
 
-            this->thread[i] = new Thread(this, i, dummy_proc, i);
+            this->thread[i] = new Thread(this, i, dummy_proc, i, params->mem);
             //usedTids[i] = false;
         }
 #endif // !FULL_SYSTEM
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index f8a252b87..629acb310 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -75,6 +75,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
       iewWidth(params->executeWidth),
       commitWidth(params->commitWidth),
       numThreads(params->numberOfThreads),
+      switchPending(false),
       switchedOut(false),
       trapLatency(params->trapLatency),
       fetchTrapLatency(params->fetchTrapLatency)
@@ -115,6 +116,7 @@ DefaultCommit<Impl>::DefaultCommit(Params *params)
         changedROBNumEntries[i] = false;
         trapSquash[i] = false;
         xcSquash[i] = false;
+        PC[i] = nextPC[i] = 0;
     }
 
     fetchFaultTick = 0;
diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh
index 64b04bc3d..8a6ea6626 100644
--- a/src/cpu/o3/decode_impl.hh
+++ b/src/cpu/o3/decode_impl.hh
@@ -280,7 +280,7 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
     toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
     toFetch->decodeInfo[tid].predIncorrect = true;
     toFetch->decodeInfo[tid].squash = true;
-    toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
+    toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
     toFetch->decodeInfo[tid].branchTaken =
         inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
 
@@ -723,9 +723,8 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
         // Go ahead and compute any PC-relative branches.
         if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
             ++decodeBranchResolved;
-            inst->setNextPC(inst->branchTarget());
 
-            if (inst->mispredicted()) {
+            if (inst->branchTarget() != inst->readPredTarg()) {
                 ++decodeBranchMispred;
 
                 // Might want to set some sort of boolean and just do
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 23328c534..9e8aeb8fb 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -323,8 +323,8 @@ class DefaultFetch
     /** Per-thread next PC. */
     Addr nextPC[Impl::MaxThreads];
 
-    /** Memory packet used to access cache. */
-    PacketPtr memPkt[Impl::MaxThreads];
+    /** Memory request used to access cache. */
+    RequestPtr memReq[Impl::MaxThreads];
 
     /** Variable that tracks if fetch has written to the time buffer this
      * cycle. Used to tell CPU if there is activity this cycle.
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 69c43a6a2..84f2c3b7e 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -105,7 +105,8 @@ DefaultFetch<Impl>::IcachePort::recvRetry()
 
 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(Params *params)
-    : branchPred(params),
+    : mem(params->mem),
+      branchPred(params),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
       iewToFetchDelay(params->iewToFetchDelay),
@@ -113,7 +114,8 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
       fetchWidth(params->fetchWidth),
       numThreads(params->numberOfThreads),
       numFetchingThreads(params->smtNumFetchingThreads),
-      interruptPending(false)
+      interruptPending(false),
+      switchedOut(false)
 {
     if (numThreads > Impl::MaxThreads)
         fatal("numThreads is not a valid value\n");
@@ -161,7 +163,7 @@ DefaultFetch<Impl>::DefaultFetch(Params *params)
 
         priorityList.push_back(tid);
 
-        memPkt[tid] = NULL;
+        memReq[tid] = NULL;
 
         // Create space to store a cache line.
         cacheData[tid] = new uint8_t[cacheBlkSize];
@@ -283,6 +285,10 @@ DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
     // Name is finally available, so create the port.
     icachePort = new IcachePort(this);
 
+    Port *mem_dport = mem->getPort("");
+    icachePort->setPeer(mem_dport);
+    mem_dport->setPeer(icachePort);
+
     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
     switchToActive();
@@ -355,10 +361,12 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
     // Only change the status if it's still waiting on the icache access
     // to return.
     if (fetchStatus[tid] != IcacheWaitResponse ||
-        pkt != memPkt[tid] ||
+        pkt->req != memReq[tid] ||
         isSwitchedOut()) {
         ++fetchIcacheSquashes;
+        delete pkt->req;
         delete pkt;
+        memReq[tid] = NULL;
         return;
     }
 
@@ -383,7 +391,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
     // Reset the mem req to NULL.
     delete pkt->req;
     delete pkt;
-    memPkt[tid] = NULL;
+    memReq[tid] = NULL;
 }
 
 template <class Impl>
@@ -514,7 +522,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
                                      fetch_PC, cpu->readCpuId(), tid);
 
-    memPkt[tid] = NULL;
+    memReq[tid] = mem_req;
 
     // Translate the instruction request.
 //#if FULL_SYSTEM
@@ -565,6 +573,9 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                 "response.\n", tid);
 
         fetchStatus[tid] = IcacheWaitResponse;
+    } else {
+        delete mem_req;
+        memReq[tid] = NULL;
     }
 
     ret_fault = fault;
@@ -585,8 +596,9 @@ DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
     if (fetchStatus[tid] == IcacheWaitResponse) {
         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
                 tid);
-        delete memPkt[tid];
-        memPkt[tid] = NULL;
+        // Should I delete this here or when it comes back from the cache?
+//        delete memReq[tid];
+        memReq[tid] = NULL;
     }
 
     fetchStatus[tid] = Squashing;
@@ -1083,7 +1095,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
         warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
 #else // !FULL_SYSTEM
-        fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
+        warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
 #endif // FULL_SYSTEM
     }
 }
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 393d8947d..414309679 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -130,8 +130,6 @@ class LSQUnit {
 
     void completeDataAccess(PacketPtr pkt);
 
-    void completeStoreDataAccess(DynInstPtr &inst);
-
     // @todo: Include stats in the LSQ unit.
     //void regStats();
 
@@ -206,10 +204,12 @@ class LSQUnit {
 
     /** Returns if the LSQ unit will writeback on this cycle. */
     bool willWB() { return storeQueue[storeWBIdx].canWB &&
-                        !storeQueue[storeWBIdx].completed/* &&
-                                                            !dcacheInterface->isBlocked()*/; }
+                        !storeQueue[storeWBIdx].completed &&
+                        !isStoreBlocked; }
 
   private:
+    void writeback(DynInstPtr &inst, PacketPtr pkt);
+
     /** Completes the store at the specified index. */
     void completeStore(int store_idx);
 
@@ -265,9 +265,43 @@ class LSQUnit {
     /** Pointer to the D-cache. */
     DcachePort *dcachePort;
 
+    class LSQSenderState : public Packet::SenderState
+    {
+      public:
+        LSQSenderState()
+            : noWB(false)
+        { }
+
+//      protected:
+        DynInstPtr inst;
+        bool isLoad;
+        int idx;
+        bool noWB;
+    };
+
     /** Pointer to the page table. */
 //    PageTable *pTable;
 
+    class WritebackEvent : public Event {
+      public:
+        /** Constructs a writeback event. */
+        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
+
+        /** Processes the writeback event. */
+        void process();
+
+        /** Returns the description of this event. */
+        const char *description();
+
+      private:
+        DynInstPtr inst;
+
+        PacketPtr pkt;
+
+        /** The pointer to the LSQ unit that issued the store. */
+        LSQUnit<Impl> *lsqPtr;
+    };
+
   public:
     struct SQEntry {
         /** Constructs an empty store queue entry. */
@@ -362,6 +396,8 @@ class LSQUnit {
     /** The index of the above store. */
     int stallingLoadIdx;
 
+    bool isStoreBlocked;
+
     /** Whether or not a load is blocked due to the memory system. */
     bool isLoadBlocked;
 
@@ -521,16 +557,17 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
             DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
                     "addr %#x, data %#x\n",
                     store_idx, req->getVaddr(), *(load_inst->memData));
-/*
-            typename LdWritebackEvent *wb =
-                new typename LdWritebackEvent(load_inst,
-                                              iewStage);
+
+            PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+            data_pkt->dataStatic(load_inst->memData);
+
+            WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
 
             // We'll say this has a 1 cycle load-store forwarding latency
             // for now.
             // @todo: Need to make this a parameter.
             wb->schedule(curTick);
-*/
+
             // Should keep track of stat for forwarded data
             return NoFault;
         } else if ((store_has_lower_limit && lower_load_has_store_part) ||
@@ -585,6 +622,12 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
     data_pkt->dataStatic(load_inst->memData);
 
+    LSQSenderState *state = new LSQSenderState;
+    state->isLoad = true;
+    state->idx = load_idx;
+    state->inst = load_inst;
+    data_pkt->senderState = state;
+
     // if we have a cache, do cache access too
     if (!dcachePort->sendTiming(data_pkt)) {
         // There's an older load that's already going to squash.
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 1ad561dc0..5398426e2 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -32,65 +32,57 @@
 #include "mem/request.hh"
 
 template<class Impl>
-void
-LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
+                                              LSQUnit *lsq_ptr)
+    : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
 {
-/*
-    DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
-    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
-    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
-    if (iewStage->isSwitchedOut()) {
-        inst = NULL;
-        return;
-    } else if (inst->isSquashed()) {
-        iewStage->wakeCPU();
-        inst = NULL;
-        return;
-    }
-
-    iewStage->wakeCPU();
-
-    if (!inst->isExecuted()) {
-        inst->setExecuted();
+    this->setFlags(Event::AutoDelete);
+}
 
-        // Complete access to copy data to proper place.
-        inst->completeAcc();
+template<class Impl>
+void
+LSQUnit<Impl>::WritebackEvent::process()
+{
+    if (!lsqPtr->isSwitchedOut()) {
+        lsqPtr->writeback(inst, pkt);
     }
+    delete pkt;
+}
 
-    // Need to insert instruction into queue to commit
-    iewStage->instToCommit(inst);
-
-    iewStage->activityThisCycle();
-
-    inst = NULL;
-*/
+template<class Impl>
+const char *
+LSQUnit<Impl>::WritebackEvent::description()
+{
+    return "Store writeback event";
 }
 
 template<class Impl>
 void
-LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
+LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
 {
-/*
-    DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
-    DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
+    LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
+    DynInstPtr inst = state->inst;
+    DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
+//    DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
 
-    //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
-
-    if (lsqPtr->isSwitchedOut()) {
-        if (wbEvent)
-            delete wbEvent;
+    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
 
+    if (isSwitchedOut() || inst->isSquashed()) {
+        delete state;
+        delete pkt;
         return;
-    }
+    } else {
+        if (!state->noWB) {
+            writeback(inst, pkt);
+        }
 
-    lsqPtr->cpu->wakeCPU();
+        if (inst->isStore()) {
+            completeStore(state->idx);
+        }
+    }
 
-    if (wb)
-        lsqPtr->completeDataAccess(storeIdx);
-    lsqPtr->completeStore(storeIdx);
-*/
+    delete state;
+    delete pkt;
 }
 
 template <class Impl>
@@ -146,7 +138,8 @@ LSQUnit<Impl>::DcachePort::recvRetry()
 
 template <class Impl>
 LSQUnit<Impl>::LSQUnit()
-    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+    : loads(0), stores(0), storesToWB(0), stalled(false),
+      isStoreBlocked(false), isLoadBlocked(false),
       loadBlockedHandled(false)
 {
 }
@@ -176,9 +169,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
     usedPorts = 0;
     cachePorts = params->cachePorts;
 
-    Port *mem_dport = params->mem->getPort("");
-    dcachePort->setPeer(mem_dport);
-    mem_dport->setPeer(dcachePort);
+    mem = params->mem;
 
     memDepViolator = NULL;
 
@@ -191,6 +182,10 @@ LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
 {
     cpu = cpu_ptr;
     dcachePort = new DcachePort(cpu, this);
+
+    Port *mem_dport = mem->getPort("");
+    dcachePort->setPeer(mem_dport);
+    mem_dport->setPeer(dcachePort);
 }
 
 template<class Impl>
@@ -446,7 +441,6 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
     int load_idx = store_inst->lqIdx;
 
     Fault store_fault = store_inst->initiateAcc();
-//    Fault store_fault = store_inst->execute();
 
     if (storeQueue[store_idx].size == 0) {
         DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
@@ -562,6 +556,12 @@ LSQUnit<Impl>::writebackStores()
            storeQueue[storeWBIdx].canWB &&
            usedPorts < cachePorts) {
 
+        if (isStoreBlocked) {
+            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+                    " is blocked!\n");
+            break;
+        }
+
         // Store didn't write any data so no need to write it back to
         // memory.
         if (storeQueue[storeWBIdx].size == 0) {
@@ -571,13 +571,7 @@ LSQUnit<Impl>::writebackStores()
 
             continue;
         }
-/*
-        if (dcacheInterface && dcacheInterface->isBlocked()) {
-            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
-                    " is blocked!\n");
-            break;
-        }
-*/
+
         ++usedPorts;
 
         if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
@@ -596,11 +590,18 @@ LSQUnit<Impl>::writebackStores()
 
         assert(!inst->memData);
         inst->memData = new uint8_t[64];
-        memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize());
+        memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data,
+               req->getSize());
 
         PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
 
+        LSQSenderState *state = new LSQSenderState;
+        state->isLoad = false;
+        state->idx = storeWBIdx;
+        state->inst = inst;
+        data_pkt->senderState = state;
+
         DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
                 "to Addr:%#x, data:%#x [sn:%lli]\n",
                 storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
@@ -609,11 +610,8 @@ LSQUnit<Impl>::writebackStores()
 
         if (!dcachePort->sendTiming(data_pkt)) {
             // Need to handle becoming blocked on a store.
+            isStoreBlocked = true;
         } else {
-            /*
-            StoreCompletionEvent *store_event = new
-                StoreCompletionEvent(storeWBIdx, NULL, this);
-            */
             if (isStalled() &&
                 storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
                 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
@@ -623,18 +621,13 @@ LSQUnit<Impl>::writebackStores()
                 stallingStoreIsn = 0;
                 iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
             }
-/*
-            typename LdWritebackEvent *wb = NULL;
-            if (req->flags & LOCKED) {
-                // Stx_C should not generate a system port transaction
-                // if it misses in the cache, but that might be hard
-                // to accomplish without explicit cache support.
-                wb = new typename
-                    LdWritebackEvent(storeQueue[storeWBIdx].inst,
-                                     iewStage);
-                store_event->wbEvent = wb;
+
+            if (!(req->getFlags() & LOCKED)) {
+                assert(!storeQueue[storeWBIdx].inst->isStoreConditional());
+                // Non-store conditionals do not need a writeback.
+                state->noWB = true;
             }
-*/
+
             if (data_pkt->result != Packet::Success) {
                 DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
                         storeWBIdx);
@@ -761,6 +754,31 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
 
 template <class Impl>
 void
+LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
+{
+    iewStage->wakeCPU();
+
+    // Squashed instructions do not need to complete their access.
+    if (inst->isSquashed()) {
+        assert(!inst->isStore());
+        return;
+    }
+
+    if (!inst->isExecuted()) {
+        inst->setExecuted();
+
+        // Complete access to copy data to proper place.
+        inst->completeAcc(pkt);
+    }
+
+    // Need to insert instruction into queue to commit
+    iewStage->instToCommit(inst);
+
+    iewStage->activityThisCycle();
+}
+
+template <class Impl>
+void
 LSQUnit<Impl>::completeStore(int store_idx)
 {
     assert(storeQueue[store_idx].inst);
diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc
index 1284361cc..a95103266 100644
--- a/src/cpu/o3/mem_dep_unit.cc
+++ b/src/cpu/o3/mem_dep_unit.cc
@@ -37,6 +37,7 @@
 // AlphaSimpleImpl.
 template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
 
+#ifdef DEBUG
 template <>
 int
 MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
@@ -46,3 +47,4 @@ MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
 template <>
 int
 MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
+#endif
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 50ad1e2c8..16f67a4e0 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -61,7 +61,9 @@ MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
         }
     }
 
+#ifdef DEBUG
     assert(MemDepEntry::memdep_count == 0);
+#endif
 }
 
 template <class MemDepPred, class Impl>
@@ -143,7 +145,9 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
     // Add the MemDepEntry to the hash.
     memDepHash.insert(
         std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
     MemDepEntry::memdep_insert++;
+#endif
 
     instList[tid].push_back(inst);
 
@@ -229,7 +233,9 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
     // Insert the MemDepEntry into the hash.
     memDepHash.insert(
         std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+#ifdef DEBUG
     MemDepEntry::memdep_insert++;
+#endif
 
     // Add the instruction to the list.
     instList[tid].push_back(inst);
@@ -277,7 +283,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
     // Add the MemDepEntry to the hash.
     memDepHash.insert(
         std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+#ifdef DEBUG
     MemDepEntry::memdep_insert++;
+#endif
 
     // Add the instruction to the instruction list.
     instList[tid].push_back(barr_inst);
@@ -377,7 +385,9 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
     (*hash_it).second = NULL;
 
     memDepHash.erase(hash_it);
+#ifdef DEBUG
     MemDepEntry::memdep_erase++;
+#endif
 }
 
 template <class MemDepPred, class Impl>
@@ -472,7 +482,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
         (*hash_it).second = NULL;
 
         memDepHash.erase(hash_it);
+#ifdef DEBUG
         MemDepEntry::memdep_erase++;
+#endif
 
         instList[tid].erase(squash_it--);
     }
@@ -553,5 +565,7 @@ MemDepUnit<MemDepPred, Impl>::dumpLists()
 
     cprintf("Memory dependence hash size: %i\n", memDepHash.size());
 
+#ifdef DEBUG
     cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+#endif
 }
diff --git a/src/cpu/o3/store_set.cc b/src/cpu/o3/store_set.cc
index 0023cee36..2d28b617f 100644
--- a/src/cpu/o3/store_set.cc
+++ b/src/cpu/o3/store_set.cc
@@ -29,6 +29,7 @@
  */
 
 #include "base/intmath.hh"
+#include "base/misc.hh"
 #include "base/trace.hh"
 #include "cpu/o3/store_set.hh"
 
diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh
index dfb1530d0..3fa60f093 100644
--- a/src/cpu/o3/thread_state.hh
+++ b/src/cpu/o3/thread_state.hh
@@ -86,14 +86,9 @@ struct O3ThreadState : public ThreadState {
           inSyscall(0), trapPending(0)
     { }
 #else
-    O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
-        : ThreadState(-1, _thread_num, NULL, _process, _asid),
-          cpu(_cpu), inSyscall(0), trapPending(0)
-    { }
-
-    O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
-                  int _asid)
-        : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+    O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid,
+                  MemObject *mem)
+        : ThreadState(-1, _thread_num, mem, _process, _asid),
           cpu(_cpu), inSyscall(0), trapPending(0)
     { }
 #endif
diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh
index e09cb12fd..a96884d5b 100644
--- a/src/cpu/thread_state.hh
+++ b/src/cpu/thread_state.hh
@@ -31,6 +31,10 @@
 
 #include "cpu/exec_context.hh"
 
+#if !FULL_SYSTEM
+#include "mem/translating_port.hh"
+#endif
+
 #if FULL_SYSTEM
 class EndQuiesceEvent;
 class FunctionProfile;
@@ -51,17 +55,27 @@ class Process;
  */
 struct ThreadState {
 #if FULL_SYSTEM
-    ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem)
-        : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0),
+    ThreadState(int _cpuId, int _tid)
+        : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0),
           profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL)
 #else
-    ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem,
+    ThreadState(int _cpuId, int _tid, MemObject *mem,
                 Process *_process, short _asid)
-        : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
+        : cpuId(_cpuId), tid(_tid), process(_process), asid(_asid)
 #endif
     {
         funcExeInst = 0;
         storeCondFailures = 0;
+#if !FULL_SYSTEM
+        /* Use this port to for syscall emulation writes to memory. */
+        Port *mem_port;
+        port = new TranslatingPort(csprintf("%d-funcport",
+                                            tid),
+                                   process->pTable, false);
+        mem_port = mem->getPort("functional");
+        mem_port->setPeer(port);
+        port->setPeer(mem_port);
+#endif
     }
 
     ExecContext::Status status;
@@ -79,8 +93,6 @@ struct ThreadState {
     Counter numLoad;
     Counter startNumLoad;
 
-    FunctionalMemory *mem;	// functional storage for process address space
-
 #if FULL_SYSTEM
     Tick lastActivate;
     Tick lastSuspend;
@@ -93,6 +105,8 @@ struct ThreadState {
 
     Kernel::Statistics *kernelStats;
 #else
+    TranslatingPort *port;
+
     Process *process;
 
     // Address space ID.  Note that this is used for TIMING cache
diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/AlphaFullCPU.py
index 043c3c08f..7c772d3f2 100644
--- a/src/python/m5/objects/AlphaFullCPU.py
+++ b/src/python/m5/objects/AlphaFullCPU.py
@@ -6,9 +6,6 @@ class DerivAlphaFullCPU(BaseCPU):
     activity = Param.Unsigned("Initial count")
     numThreads = Param.Unsigned("number of HW thread contexts")
 
-    if not build_env['FULL_SYSTEM']:
-        mem = Param.FunctionalMemory(NULL, "memory")
-
     checker = Param.BaseCPU(NULL, "checker")
 
     cachePorts = Param.Unsigned("Cache Ports")
author	Kevin Lim <ktlim@umich.edu>	2006-06-05 18:14:39 -0400
committer	Kevin Lim <ktlim@umich.edu>	2006-06-05 18:14:39 -0400
commit	090496bf2d4c0f55f7f5869a374b4ec3826bccbc (patch)
tree	4be899992389661b5cd60f2f067e39e719577430
parent	295c7a908cfeecc7276f559ff53282a177f4eb66 (diff)
download	gem5-090496bf2d4c0f55f7f5869a374b4ec3826bccbc.tar.xz