inorder: add a fetch buffer to fetch unit

Give fetch unit it's own parameterizable fetch buffer to read from. Very inefficient (architecturally and in simulation) to continually fetch at the granularity of the wordsize. As expected, the number of fetch memory requests drops dramatically
author: Korey Sewell <ksewell@umich.edu> 2011-02-04 00:08:22 -0500
committer: Korey Sewell <ksewell@umich.edu> 2011-02-04 00:08:22 -0500
commit: 68d962f8aff7d2fcc2f8ee77878dd5cab73b69f2 (patch)
tree: c4bb7d88b864e5ee353b743553bbea0efd34cbb8 /src/cpu
parent: 56ce8acd412747b728b7ad02537a3afd202ae8e8 (diff)
download: gem5-68d962f8aff7d2fcc2f8ee77878dd5cab73b69f2.tar.xz
5 files changed, 373 insertions, 139 deletions
diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py
index 5d24ae4fd..4766a1ac1 100644
--- a/src/cpu/inorder/InOrderCPU.py
+++ b/src/cpu/inorder/InOrderCPU.py
@@ -48,6 +48,9 @@ class InOrderCPU(BaseCPU):
     dcache_port = Port("Data Port")
     _cached_ports = ['icache_port', 'dcache_port']
 
+    fetchBuffSize = Param.Unsigned(4, "Fetch Buffer Size (Number of Cache Blocks Stored)")
+    memBlockSize = Param.Unsigned(64, "Memory Block Size")
+
     predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')")
     localPredictorSize = Param.Unsigned(2048, "Size of local predictor")
     localCtrBits = Param.Unsigned(2, "Bits per counter")
@@ -69,8 +72,6 @@ class InOrderCPU(BaseCPU):
     functionTraceStart = Param.Tick(0, "Cycle to start function trace")
     stageTracing = Param.Bool(False, "Enable tracing of each stage in CPU")
 
-    memBlockSize = Param.Unsigned(64, "Memory Block Size")
-
     multLatency = Param.Unsigned(1, "Latency for Multiply Operations")
     multRepeatRate = Param.Unsigned(1, "Repeat Rate for Multiply Operations")
     div8Latency = Param.Unsigned(1, "Latency for 8-bit Divide Operations")
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index 03f1c1527..6c9da67f5 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -97,7 +97,7 @@ CacheUnit::CachePort::recvRetry()
 CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
         int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
     : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      cachePortBlocked(false), predecoder(NULL)
+      cachePortBlocked(false)
 {
     cachePort = new CachePort(this);
 
@@ -137,6 +137,9 @@ CacheUnit::init()
     // Switch to Timing TLB translations.
     resourceEvent = new CacheUnitEvent[width];
 
+    cacheBlkSize = this->cachePort->peerBlockSize();
+    cacheBlkMask = cacheBlkSize  - 1;
+
     initSlots();
 }
 
@@ -375,28 +378,20 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
     }
 }
 
-Fault
-CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
-                       int flags, TheISA::TLB::Mode tlb_mode)
+void
+CacheUnit::setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
+                           int acc_size, int flags)
 {
     ThreadID tid = inst->readTid();
     Addr aligned_addr = inst->getMemAddr();
-    unsigned stage_num = cache_req->getStageNum();
-    unsigned slot_idx = cache_req->getSlot();
 
-    if (tlb_mode == TheISA::TLB::Execute) {
-        inst->fetchMemReq =
-            new Request(inst->readTid(), aligned_addr, acc_size, flags,
-                        inst->instAddr(), cpu->readCpuId(), inst->readTid());
-        cache_req->memReq = inst->fetchMemReq;
-    } else {
-        if (!cache_req->is2ndSplit()) {            
+    if (!cache_req->is2ndSplit()) {
             inst->dataMemReq =
                 new Request(cpu->asid[tid], aligned_addr, acc_size, flags,
                             inst->instAddr(), cpu->readCpuId(),
-                            inst->readTid());
+                            tid);
             cache_req->memReq = inst->dataMemReq;
-        } else {
+    } else {
             assert(inst->splitInst);
             
             inst->splitMemReq = new Request(cpu->asid[tid], 
@@ -407,9 +402,19 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
                                             cpu->readCpuId(), 
                                             tid);
             cache_req->memReq = inst->splitMemReq;            
-        }
     }
-    
+}
+
+Fault
+CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
+                       int flags, TheISA::TLB::Mode tlb_mode)
+{
+    ThreadID tid = inst->readTid();
+    //Addr aligned_addr = inst->getMemAddr();
+    unsigned stage_num = cache_req->getStageNum();
+    unsigned slot_idx = cache_req->getSlot();
+
+    setupMemRequest(inst, cache_req, acc_size, flags);
 
     cache_req->fault =
         _tlb->translateAtomic(cache_req->memReq,
@@ -842,8 +847,8 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
     }
 
     DPRINTF(InOrderCachePort,
-            "[tid:%i] [sn:%i] attempting to access cache\n",
-            tid, inst->seqNum);
+            "[tid:%i] [sn:%i] attempting to access cache for addr %08p\n",
+            tid, inst->seqNum, cache_req->dataPkt->getAddr());
 
     if (do_access) {
         if (!cachePort->sendTiming(cache_req->dataPkt)) {
@@ -1086,6 +1091,24 @@ CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
     squash(inst, stage_num, squash_seq_num + 1, tid);    
 }
 
+void
+CacheUnit::squashCacheRequest(CacheReqPtr req_ptr)
+{
+    DynInstPtr inst =  req_ptr->getInst();
+
+    req_ptr->setSquashed();
+    inst->setSquashed();
+    if (inst->validMemAddr()) {
+        DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
+                "remove addr. %08p dependencies.\n",
+                inst->readTid(),
+                inst->seqNum,
+                inst->getMemAddr());
+
+        removeAddrDependency(inst);
+    }
+}
+
 
 void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
@@ -1115,14 +1138,12 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                 map_it++;                
                 continue;                
             }
-            
-            req_ptr->setSquashed();
-
-            req_ptr->getInst()->setSquashed();
 
             CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr);
             assert(cache_req);
 
+            squashCacheRequest(cache_req);
+
             int req_slot_num = req_ptr->getSlot();
 
             if (cache_req->tlbStall) {
@@ -1152,15 +1173,6 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                         req_ptr->getInst()->splitInst);
             }
 
-            if (req_ptr->getInst()->validMemAddr()) {                    
-                DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
-                        "remove addr. %08p dependencies.\n",
-                        req_ptr->getInst()->readTid(),
-                        req_ptr->getInst()->seqNum, 
-                        req_ptr->getInst()->getMemAddr());
-                
-                removeAddrDependency(req_ptr->getInst());
-            }            
         }
 
         map_it++;
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index ece06be83..0d911999d 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -139,10 +139,16 @@ class CacheUnit : public Resource
     void squashDueToMemStall(DynInstPtr inst, int stage_num,
                              InstSeqNum squash_seq_num, ThreadID tid);
 
+    virtual void squashCacheRequest(CacheReqPtr req_ptr);
+
     /** After memory request is completedd in the cache, then do final
         processing to complete the request in the CPU.
     */
-   virtual void processCacheCompletion(PacketPtr pkt);
+    virtual void processCacheCompletion(PacketPtr pkt);
+
+    /** Create request that will interface w/TLB and Memory objects */
+    virtual void setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
+                                 int acc_size, int flags);
 
     void recvRetry();
 
@@ -167,7 +173,7 @@ class CacheUnit : public Resource
     uint64_t getMemData(Packet *packet);
 
     void setAddrDependency(DynInstPtr inst);
-    void removeAddrDependency(DynInstPtr inst);
+    virtual void removeAddrDependency(DynInstPtr inst);
     
   protected:
     /** Cache interface. */
@@ -190,8 +196,6 @@ class CacheUnit : public Resource
         return (addr & ~(cacheBlkMask));
     }
 
-    TheISA::Predecoder predecoder;
-
     bool tlbBlocked[ThePipeline::MaxThreads];
 
     TheISA::TLB* tlb();
@@ -225,7 +229,7 @@ class CacheRequest : public ResourceRequest
           pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
           retryPkt(NULL), memAccComplete(false), memAccPending(false),
           tlbStall(false), splitAccess(false), splitAccessNum(-1),
-          split2ndAccess(false), instIdx(idx)
+          split2ndAccess(false), instIdx(idx), fetchBufferFill(false)
     { }
 
 
@@ -270,7 +274,9 @@ class CacheRequest : public ResourceRequest
     int splitAccessNum;
     bool split2ndAccess;
     int instIdx;    
-    
+
+    /** Should we expect block from cache access or fetch buffer? */
+    bool fetchBufferFill;
 };
 
 class CacheReqPacket : public Packet
diff --git a/src/cpu/inorder/resources/fetch_unit.cc b/src/cpu/inorder/resources/fetch_unit.cc
index 877d23375..7bbeffadd 100644
--- a/src/cpu/inorder/resources/fetch_unit.cc
+++ b/src/cpu/inorder/resources/fetch_unit.cc
@@ -37,6 +37,7 @@
 #include "arch/utility.hh"
 #include "arch/predecoder.hh"
 #include "config/the_isa.hh"
+#include "cpu/inorder/resources/cache_unit.hh"
 #include "cpu/inorder/resources/fetch_unit.hh"
 #include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/cpu.hh"
@@ -50,10 +51,42 @@ using namespace ThePipeline;
 FetchUnit::FetchUnit(string res_name, int res_id, int res_width,
                      int res_latency, InOrderCPU *_cpu,
                      ThePipeline::Params *params)
-    : CacheUnit(res_name, res_id, res_width, res_latency, _cpu,
-                params)
+    : CacheUnit(res_name, res_id, res_width, res_latency, _cpu, params),
+      instSize(sizeof(TheISA::MachInst)), fetchBuffSize(params->fetchBuffSize),
+      predecoder(NULL)
 { }
 
+void
+FetchUnit::createMachInst(std::list<FetchBlock*>::iterator fetch_it,
+                          DynInstPtr inst)
+{
+    ExtMachInst ext_inst;
+    Addr block_addr = cacheBlockAlign(inst->getMemAddr());
+    Addr fetch_addr = inst->getMemAddr();
+    unsigned fetch_offset = (fetch_addr - block_addr) / instSize;
+    ThreadID tid = inst->readTid();
+    TheISA::PCState instPC = inst->pcState();
+
+
+    DPRINTF(InOrderCachePort, "Creating instruction [sn:%i] w/fetch data @"
+            "addr:%08p block:%08p\n", inst->seqNum, fetch_addr, block_addr);
+
+    assert((*fetch_it)->valid);
+
+    TheISA::MachInst *fetchInsts =
+        reinterpret_cast<TheISA::MachInst *>((*fetch_it)->block);
+
+    MachInst mach_inst =
+        TheISA::gtoh(fetchInsts[fetch_offset]);
+
+    predecoder.setTC(cpu->thread[tid]->getTC());
+    predecoder.moreBytes(instPC, inst->instAddr(), mach_inst);
+    ext_inst = predecoder.getExtMachInst(instPC);
+
+    inst->pcState(instPC);
+    inst->setMachInst(ext_inst);
+}
+
 int
 FetchUnit::getSlot(DynInstPtr inst)
 {
@@ -119,15 +152,64 @@ FetchUnit::setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
                            int acc_size, int flags)
 {
     ThreadID tid = inst->readTid();
-    Addr aligned_addr = inst->getMemAddr();
+    Addr aligned_addr = cacheBlockAlign(inst->getMemAddr());
 
     inst->fetchMemReq =
-            new Request(inst->readTid(), aligned_addr, acc_size, flags,
-                        inst->instAddr(), cpu->readCpuId(), inst->readTid());
+            new Request(tid, aligned_addr, acc_size, flags,
+                        inst->instAddr(), cpu->readCpuId(), tid);
 
     cache_req->memReq = inst->fetchMemReq;
 }
 
+std::list<FetchUnit::FetchBlock*>::iterator
+FetchUnit::findBlock(std::list<FetchBlock*> &fetch_blocks, int asid,
+                     Addr block_addr)
+{
+    std::list<FetchBlock*>::iterator fetch_it = fetch_blocks.begin();
+    std::list<FetchBlock*>::iterator end_it = fetch_blocks.end();
+
+    while (fetch_it != end_it) {
+        if ((*fetch_it)->asid == asid &&
+            (*fetch_it)->addr == block_addr) {
+            return fetch_it;
+        }
+
+        fetch_it++;
+    }
+
+    return fetch_it;
+}
+
+std::list<FetchUnit::FetchBlock*>::iterator
+FetchUnit::findReplacementBlock()
+{
+    std::list<FetchBlock*>::iterator fetch_it = fetchBuffer.begin();
+    std::list<FetchBlock*>::iterator end_it = fetchBuffer.end();
+
+    while (fetch_it != end_it) {
+        if ((*fetch_it)->cnt == 0) {
+            return fetch_it;
+        } else {
+            DPRINTF(InOrderCachePort, "Block %08p has %i insts pending.\n",
+                    (*fetch_it)->addr, (*fetch_it)->cnt);
+        }
+        fetch_it++;
+    }
+
+    return fetch_it;
+}
+
+void
+FetchUnit::markBlockUsed(std::list<FetchBlock*>::iterator block_it)
+{
+    // Move block from whatever location it is in fetch buffer
+    // to the back (represents most-recently-used location)
+    if (block_it != fetchBuffer.end()) {
+        FetchBlock *mru_blk = *block_it;
+        fetchBuffer.erase(block_it);
+        fetchBuffer.push_back(mru_blk);
+    }
+}
 
 void
 FetchUnit::execute(int slot_num)
@@ -142,54 +224,157 @@ FetchUnit::execute(int slot_num)
     }
 
     DynInstPtr inst = cache_req->inst;
-#if TRACING_ON
     ThreadID tid = inst->readTid();
-    int seq_num = inst->seqNum;
-    std::string acc_type = "write";
-#endif
-
+    Addr block_addr = cacheBlockAlign(inst->getMemAddr());
+    int asid = cpu->asid[tid];
     cache_req->fault = NoFault;
 
     switch (cache_req->cmd)
     {
       case InitiateFetch:
         {
+            // Check to see if we've already got this request buffered
+            // or pending to be buffered
+            bool do_fetch = true;
+            std::list<FetchBlock*>::iterator pending_it;
+            pending_it = findBlock(pendingFetch, asid, block_addr);
+            if (pending_it != pendingFetch.end()) {
+                (*pending_it)->cnt++;
+                do_fetch = false;
+
+                DPRINTF(InOrderCachePort, "%08p is a pending fetch block "
+                        "(pending:%i).\n", block_addr,
+                        (*pending_it)->cnt);
+            } else if (pendingFetch.size() < fetchBuffSize) {
+                std::list<FetchBlock*>::iterator buff_it;
+                buff_it = findBlock(fetchBuffer, asid, block_addr);
+                if (buff_it  != fetchBuffer.end()) {
+                    (*buff_it)->cnt++;
+                    do_fetch = false;
+
+                    DPRINTF(InOrderCachePort, "%08p is in fetch buffer"
+                            "(pending:%i).\n", block_addr, (*buff_it)->cnt);
+                }
+            }
+
+            if (!do_fetch) {
+                DPRINTF(InOrderCachePort, "Inst. [sn:%i] marked to be filled "
+                        "through fetch buffer.\n", inst->seqNum);
+                cache_req->fetchBufferFill = true;
+                cache_req->setCompleted(true);
+                return;
+            }
+
+            // Check to see if there is room in the fetchbuffer for this instruction.
+            // If not, block this request.
+            if (pendingFetch.size() >= fetchBuffSize) {
+                DPRINTF(InOrderCachePort, "No room available in fetch buffer.\n");
+                cache_req->setCompleted(false);
+                return;
+            }
+
             doTLBAccess(inst, cache_req, cacheBlkSize, 0, TheISA::TLB::Execute);
 
             if (cache_req->fault == NoFault) {
                 DPRINTF(InOrderCachePort,
-                    "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
-                    tid, name(), cache_req->inst->getMemAddr());
+                        "[tid:%u]: Initiating fetch access to %s for "
+                        "addr:%#x (block:%#x)\n", tid, name(),
+                        cache_req->inst->getMemAddr(), block_addr);
 
-                cache_req->reqData = new uint8_t[cacheBlksize];
+                cache_req->reqData = new uint8_t[cacheBlkSize];
 
                 inst->setCurResSlot(slot_num);
 
                 doCacheAccess(inst);
+
+                if (cache_req->isMemAccPending()) {
+                    pendingFetch.push_back(new FetchBlock(asid, block_addr));
+                }
             }
 
             break;
         }
 
       case CompleteFetch:
+        if (cache_req->fetchBufferFill) {
+            // Block request if it's depending on a previous fetch, but it hasnt made it yet
+            std::list<FetchBlock*>::iterator fetch_it = findBlock(fetchBuffer, asid, block_addr);
+            if (fetch_it == fetchBuffer.end()) {
+                DPRINTF(InOrderCachePort, "%#x not available yet\n",
+                        block_addr);
+                cache_req->setCompleted(false);
+                return;
+            }
+
+            // Make New Instruction
+            createMachInst(fetch_it, inst);
+            if (inst->traceData) {
+                inst->traceData->setStaticInst(inst->staticInst);
+                inst->traceData->setPC(inst->pcState());
+            }
+
+            // FetchBuffer Book-Keeping
+            (*fetch_it)->cnt--;
+            assert((*fetch_it)->cnt >= 0);
+            markBlockUsed(fetch_it);
+
+            cache_req->done();
+            return;
+        }
+
         if (cache_req->isMemAccComplete()) {
+            if (fetchBuffer.size() >= fetchBuffSize) {
+                // If there is no replacement block, then we'll just have
+                // to wait till that gets cleared before satisfying the fetch
+                // for this instruction
+                std::list<FetchBlock*>::iterator repl_it  =
+                    findReplacementBlock();
+                if (repl_it == fetchBuffer.end()) {
+                    DPRINTF(InOrderCachePort, "Unable to find replacement block"
+                            " and complete fetch.\n");
+                    cache_req->setCompleted(false);
+                    return;
+                }
+
+                fetchBuffer.erase(repl_it);
+            }
+
             DPRINTF(InOrderCachePort,
                     "[tid:%i]: Completing Fetch Access for [sn:%i]\n",
                     tid, inst->seqNum);
 
+            // Make New Instruction
+            std::list<FetchBlock*>::iterator fetch_it  =
+                findBlock(pendingFetch, asid, block_addr);
+
+            assert(fetch_it != pendingFetch.end());
+            assert((*fetch_it)->valid);
+
+            createMachInst(fetch_it, inst);
+            if (inst->traceData) {
+                inst->traceData->setStaticInst(inst->staticInst);
+                inst->traceData->setPC(inst->pcState());
+            }
+
+
+            // Update instructions waiting on new fetch block
+            FetchBlock *new_block = (*fetch_it);
+            new_block->cnt--;
+            assert(new_block->cnt >= 0);
+
+            // Finally, update FetchBuffer w/Pending Block into the
+            // MRU location
+            pendingFetch.erase(fetch_it);
+            fetchBuffer.push_back(new_block);
 
             DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n",
-                    tid, seq_num,
+                    tid, inst->seqNum,
                     inst->staticInst->disassemble(inst->instAddr()));
 
-            removeAddrDependency(inst);
+            inst->unsetMemAddr();
 
             delete cache_req->dataPkt;
 
-            // Do not stall and switch threads for fetch... for now..
-            // TODO: We need to detect cache misses for latencies > 1
-            // cache_req->setMemStall(false);
-
             cache_req->done();
         } else {
             DPRINTF(InOrderCachePort,
@@ -199,7 +384,9 @@ FetchUnit::execute(int slot_num)
                     "STALL: [tid:%i]: Fetch miss from %08p\n",
                     tid, cache_req->inst->instAddr());
             cache_req->setCompleted(false);
-            //cache_req->setMemStall(true);
+            // NOTE: For SwitchOnCacheMiss ThreadModel, we *don't* switch on
+            //       fetch miss, but we could ...
+            // cache_req->setMemStall(true);
         }
         break;
 
@@ -213,7 +400,6 @@ FetchUnit::processCacheCompletion(PacketPtr pkt)
 {
     // Cast to correct packet type
     CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
-
     assert(cache_pkt);
 
     if (cache_pkt->cacheReq->isSquashed()) {
@@ -230,104 +416,108 @@ FetchUnit::processCacheCompletion(PacketPtr pkt)
         delete cache_pkt;
 
         cpu->wakeCPU();
-
         return;
     }
 
+    Addr block_addr = cacheBlockAlign(cache_pkt->cacheReq->
+                                      getInst()->getMemAddr());
+
     DPRINTF(InOrderCachePort,
-            "[tid:%u]: [sn:%i]: Waking from cache access to addr. %08p\n",
+            "[tid:%u]: [sn:%i]: Waking from fetch access to addr:%#x(phys:%#x), size:%i\n",
             cache_pkt->cacheReq->getInst()->readTid(),
             cache_pkt->cacheReq->getInst()->seqNum,
-            cache_pkt->cacheReq->getInst()->getMemAddr());
+            block_addr, cache_pkt->getAddr(), cache_pkt->getSize());
 
     // Cast to correct request type
     CacheRequest *cache_req = dynamic_cast<CacheReqPtr>(
         findRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
 
     if (!cache_req) {
-        panic("[tid:%u]: [sn:%i]: Can't find slot for cache access to "
+        panic("[tid:%u]: [sn:%i]: Can't find slot for fetch access to "
               "addr. %08p\n", cache_pkt->cacheReq->getInst()->readTid(),
               cache_pkt->cacheReq->getInst()->seqNum,
-              cache_pkt->cacheReq->getInst()->getMemAddr());
+              block_addr);
     }
 
-    assert(cache_req);
-
-
     // Get resource request info
     unsigned stage_num = cache_req->getStageNum();
     DynInstPtr inst = cache_req->inst;
     ThreadID tid = cache_req->inst->readTid();
+    short asid = cpu->asid[tid];
 
-    if (!cache_req->isSquashed()) {
-        assert(inst->resSched.top()->cmd == CompleteFetch);
+    assert(!cache_req->isSquashed());
+    assert(inst->resSched.top()->cmd == CompleteFetch);
 
-        DPRINTF(InOrderCachePort,
-                "[tid:%u]: [sn:%i]: Processing fetch access\n",
-                tid, inst->seqNum);
-
-        // NOTE: This is only allowing a thread to fetch one line
-        //       at a time. Re-examine when/if prefetching
-        //       gets implemented.
-        // memcpy(fetchData[tid], cache_pkt->getPtr<uint8_t>(),
-        //        cache_pkt->getSize());
-
-        // Get the instruction from the array of the cache line.
-        // @todo: update this
-        ExtMachInst ext_inst;
-        StaticInstPtr staticInst = NULL;
-        TheISA::PCState instPC = inst->pcState();
-        MachInst mach_inst =
-            TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
-                         (cache_pkt->getPtr<uint8_t>()));
-
-        predecoder.setTC(cpu->thread[tid]->getTC());
-        predecoder.moreBytes(instPC, inst->instAddr(), mach_inst);
-        ext_inst = predecoder.getExtMachInst(instPC);
-        inst->pcState(instPC);
-
-        inst->setMachInst(ext_inst);
-
-        // Set Up More TraceData info
-        if (inst->traceData) {
-            inst->traceData->setStaticInst(inst->staticInst);
-            inst->traceData->setPC(instPC);
-        }
+    DPRINTF(InOrderCachePort,
+            "[tid:%u]: [sn:%i]: Processing fetch access for block %#x\n",
+            tid, inst->seqNum, block_addr);
 
-        cache_req->setMemAccPending(false);
-        cache_req->setMemAccCompleted();
+    std::list<FetchBlock*>::iterator pend_it = findBlock(pendingFetch, asid,
+                                                         block_addr);
+    assert(pend_it != pendingFetch.end());
 
-        if (cache_req->isMemStall() &&
-            cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
-            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n",
-                    tid);
+    // Copy Data to pendingFetch queue...
+    (*pend_it)->block = new uint8_t[cacheBlkSize];
+    memcpy((*pend_it)->block, cache_pkt->getPtr<uint8_t>(), cacheBlkSize);
+    (*pend_it)->valid = true;
 
-            cpu->activateContext(tid);
+    cache_req->setMemAccPending(false);
+    cache_req->setMemAccCompleted();
 
-            DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
-                    "miss.\n", tid);
-        }
+    if (cache_req->isMemStall() &&
+        cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+        DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n",
+                tid);
 
-        // Wake up the CPU (if it went to sleep and was waiting on this
-        // completion event).
-        cpu->wakeCPU();
+        cpu->activateContext(tid);
+
+        DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
+                "miss.\n", tid);
+    }
 
-        DPRINTF(Activity, "[tid:%u] Activating %s due to cache completion\n",
+    // Wake up the CPU (if it went to sleep and was waiting on this
+    // completion event).
+    cpu->wakeCPU();
+
+    DPRINTF(Activity, "[tid:%u] Activating %s due to cache completion\n",
             tid, cpu->pipelineStage[stage_num]->name());
 
-        cpu->switchToActive(stage_num);
-    } else {
-        DPRINTF(InOrderCachePort,
-                "[tid:%u] Miss on block @ %08p completed, but squashed\n",
-                tid, cache_req->inst->instAddr());
-        cache_req->setMemAccCompleted();
-    }
+    cpu->switchToActive(stage_num);
 }
 
 void
-FetchUnit::squash(DynInstPtr inst, int stage_num,
-                  InstSeqNum squash_seq_num, ThreadID tid)
+FetchUnit::squashCacheRequest(CacheReqPtr req_ptr)
 {
-    CacheUnit::squash(inst, stage_num, squash_seq_num, tid);
+    DynInstPtr inst = req_ptr->getInst();
+    ThreadID tid = inst->readTid();
+    Addr block_addr = cacheBlockAlign(inst->getMemAddr());
+    int asid = cpu->asid[tid];
+
+    // Check Fetch Buffer (or pending fetch) for this block and
+    // update pending counts
+    std::list<FetchBlock*>::iterator buff_it = findBlock(fetchBuffer,
+                                                         asid,
+                                                         block_addr);
+    if (buff_it != fetchBuffer.end()) {
+        (*buff_it)->cnt--;
+        DPRINTF(InOrderCachePort, "[sn:%i] Removing Pending Fetch "
+                "for Buffer block %08p (cnt=%i)\n", inst->seqNum,
+                block_addr, (*buff_it)->cnt);
+    } else {
+        std::list<FetchBlock*>::iterator block_it = findBlock(pendingFetch,
+                                                              asid,
+                                                              block_addr);
+        if (block_it != pendingFetch.end()) {
+            (*block_it)->cnt--;
+            if ((*block_it)->cnt == 0) {
+                DPRINTF(InOrderCachePort, "[sn:%i] Removing Pending Fetch "
+                        "for block %08p (cnt=%i)\n", inst->seqNum,
+                        block_addr, (*block_it)->cnt);
+                pendingFetch.erase(block_it);
+            }
+        }
+    }
+
+    CacheUnit::squashCacheRequest(req_ptr);
 }
 
diff --git a/src/cpu/inorder/resources/fetch_unit.hh b/src/cpu/inorder/resources/fetch_unit.hh
index 257a8c50a..035f3f4a1 100644
--- a/src/cpu/inorder/resources/fetch_unit.hh
+++ b/src/cpu/inorder/resources/fetch_unit.hh
@@ -52,19 +52,31 @@
 class FetchUnit : public CacheUnit
 {
   public:
-    typedef ThePipeline::DynInstPtr DynInstPtr;
-
-  public:
     FetchUnit(std::string res_name, int res_id, int res_width,
               int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
 
-    /** Actions that this resources can take on an instruction */
+    typedef ThePipeline::DynInstPtr DynInstPtr;
+    typedef TheISA::ExtMachInst ExtMachInst;
+
+    struct FetchBlock {
+        int asid;
+        Addr addr;
+        uint8_t *block;
+        short cnt;
+        bool valid;
+
+        FetchBlock(int _asid, Addr _addr)
+            : asid(_asid), addr(_addr), block(NULL), cnt(1), valid(false)
+        { }
+    };
+
+    /** Actions that this resource can take on an instruction */
     enum Command {
         InitiateFetch,
         CompleteFetch
     };
 
-  public:
+
     ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                 int res_idx, int slot_num,
                                 unsigned cmd);
@@ -74,8 +86,11 @@ class FetchUnit : public CacheUnit
     /** Executes one of the commands from the "Command" enum */
     void execute(int slot_num);
 
-    void squash(DynInstPtr inst, int stage_num,
-                InstSeqNum squash_seq_num, ThreadID tid);
+  private:
+    void squashCacheRequest(CacheReqPtr req_ptr);
+
+    void createMachInst(std::list<FetchBlock*>::iterator fetch_it,
+                        DynInstPtr inst);
 
     /** After memory request is completed, then turn the fetched data
         into an instruction.
@@ -94,14 +109,24 @@ class FetchUnit : public CacheUnit
 
     void removeAddrDependency(DynInstPtr inst);
 
-  public:
-    /** The mem line being fetched. */
-    uint8_t *fetchData[ThePipeline::MaxThreads];
+    std::list<FetchBlock*>::iterator findReplacementBlock();
+    std::list<FetchBlock*>::iterator findBlock(std::list<FetchBlock*>
+                                               &fetch_blocks, int asid,
+                                               Addr block_addr);
+
+    void markBlockUsed(std::list<FetchBlock*>::iterator block_it);
+
+    int instSize;
+
+    int fetchBuffSize;
+
+    TheISA::Predecoder predecoder;
 
+    /** Valid Cache Blocks*/
+    std::list<FetchBlock*> fetchBuffer;
 
-    /** The Addr of the cacheline that has been loaded. */
-    //Addr cacheBlockAddr[ThePipeline::MaxThreads];
-    //unsigned fetchOffset[ThePipeline::MaxThreads];
+    /** Cache lines that are pending */
+    std::list<FetchBlock*> pendingFetch;
 };
 
 #endif //__CPU_FETCH_UNIT_HH__
author	Korey Sewell <ksewell@umich.edu>	2011-02-04 00:08:22 -0500
committer	Korey Sewell <ksewell@umich.edu>	2011-02-04 00:08:22 -0500
commit	68d962f8aff7d2fcc2f8ee77878dd5cab73b69f2 (patch)
tree	c4bb7d88b864e5ee353b743553bbea0efd34cbb8 /src/cpu
parent	56ce8acd412747b728b7ad02537a3afd202ae8e8 (diff)
download	gem5-68d962f8aff7d2fcc2f8ee77878dd5cab73b69f2.tar.xz