diff options
author | Brad Beckmann <Brad.Beckmann@amd.com> | 2010-01-31 22:28:13 -0800 |
---|---|---|
committer | Brad Beckmann <Brad.Beckmann@amd.com> | 2010-01-31 22:28:13 -0800 |
commit | 4e00cc9900ec4f61899ee5be0c5b3827487e91f5 (patch) | |
tree | 372809113645b16ab99adb5c4b81d7f3512780e5 /src/cpu/inorder/resources | |
parent | deb97742c7ada2008ec79aaf1791f7db3c6a2b06 (diff) | |
parent | 04466ab4ca04a4e1e195a6f68423792b2553dadb (diff) | |
download | gem5-4e00cc9900ec4f61899ee5be0c5b3827487e91f5.tar.xz |
merge
Diffstat (limited to 'src/cpu/inorder/resources')
-rw-r--r-- | src/cpu/inorder/resources/cache_unit.cc | 650 | ||||
-rw-r--r-- | src/cpu/inorder/resources/cache_unit.hh | 66 | ||||
-rw-r--r-- | src/cpu/inorder/resources/execution_unit.cc | 17 | ||||
-rw-r--r-- | src/cpu/inorder/resources/execution_unit.hh | 6 | ||||
-rw-r--r-- | src/cpu/inorder/resources/fetch_seq_unit.cc | 37 | ||||
-rw-r--r-- | src/cpu/inorder/resources/fetch_seq_unit.hh | 7 | ||||
-rw-r--r-- | src/cpu/inorder/resources/graduation_unit.cc | 2 | ||||
-rw-r--r-- | src/cpu/inorder/resources/graduation_unit.hh | 2 | ||||
-rw-r--r-- | src/cpu/inorder/resources/inst_buffer.cc | 3 | ||||
-rw-r--r-- | src/cpu/inorder/resources/mult_div_unit.hh | 1 | ||||
-rw-r--r-- | src/cpu/inorder/resources/use_def.cc | 231 | ||||
-rw-r--r-- | src/cpu/inorder/resources/use_def.hh | 12 |
12 files changed, 826 insertions, 208 deletions
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc index eb66e10f8..cb1861ea9 100644 --- a/src/cpu/inorder/resources/cache_unit.cc +++ b/src/cpu/inorder/resources/cache_unit.cc @@ -40,6 +40,7 @@ #include "cpu/inorder/resources/cache_unit.hh" #include "cpu/inorder/pipeline_traits.hh" #include "cpu/inorder/cpu.hh" +#include "cpu/inorder/resource_pool.hh" #include "mem/request.hh" using namespace std; @@ -49,14 +50,14 @@ using namespace ThePipeline; Tick CacheUnit::CachePort::recvAtomic(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvAtomic callback!"); + panic("CacheUnit::CachePort doesn't expect recvAtomic callback!"); return curTick; } void CacheUnit::CachePort::recvFunctional(PacketPtr pkt) { - panic("DefaultFetch doesn't expect recvFunctional callback!"); + panic("CacheUnit::CachePort doesn't expect recvFunctional callback!"); } void @@ -65,7 +66,7 @@ CacheUnit::CachePort::recvStatusChange(Status status) if (status == RangeChange) return; - panic("DefaultFetch doesn't expect recvStatusChange callback!"); + panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!"); } bool @@ -84,8 +85,7 @@ CacheUnit::CachePort::recvRetry() CacheUnit::CacheUnit(string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params) : Resource(res_name, res_id, res_width, res_latency, _cpu), - retryPkt(NULL), retrySlot(-1), cacheBlocked(false), - predecoder(NULL) + cachePortBlocked(false), predecoder(NULL) { cachePort = new CachePort(this); @@ -131,18 +131,24 @@ CacheUnit::init() int CacheUnit::getSlot(DynInstPtr inst) { + ThreadID tid = inst->readTid(); + if (tlbBlocked[inst->threadNumber]) { return -1; } - if (!inst->validMemAddr()) { - panic("Mem. Addr. must be set before requesting cache access\n"); + // For a Split-Load, the instruction would have processed once already + // causing the address to be unset. + if (!inst->validMemAddr() && !inst->splitInst) { + panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n", + inst->readTid(), inst->seqNum); } Addr req_addr = inst->getMemAddr(); if (resName == "icache_port" || - find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) { + find(addrList[tid].begin(), addrList[tid].end(), req_addr) == + addrList[tid].end()) { int new_slot = Resource::getSlot(inst); @@ -150,37 +156,115 @@ CacheUnit::getSlot(DynInstPtr inst) return -1; inst->memTime = curTick; - addrList.push_back(req_addr); - addrMap[req_addr] = inst->seqNum; - DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", - inst->readTid(), inst->seqNum, req_addr); + setAddrDependency(inst); return new_slot; } else { - DPRINTF(InOrderCachePort, - "Denying request because there is an outstanding" + // Allow same instruction multiple accesses to same address + // should only happen maybe after a squashed inst. needs to replay + if (addrMap[tid][req_addr] == inst->seqNum) { + int new_slot = Resource::getSlot(inst); + + if (new_slot == -1) + return -1; + + return new_slot; + } else { + DPRINTF(InOrderCachePort, + "[tid:%i] Denying request because there is an outstanding" " request to/for addr. %08p. by [sn:%i] @ tick %i\n", - req_addr, addrMap[req_addr], inst->memTime); - return -1; + inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime); + return -1; + } } + + return -1; } void -CacheUnit::freeSlot(int slot_num) +CacheUnit::setAddrDependency(DynInstPtr inst) { - vector<Addr>::iterator vect_it = find(addrList.begin(), addrList.end(), - reqMap[slot_num]->inst->getMemAddr()); - assert(vect_it != addrList.end()); + Addr req_addr = inst->getMemAddr(); + ThreadID tid = inst->readTid(); + addrList[tid].push_back(req_addr); + addrMap[tid][req_addr] = inst->seqNum; DPRINTF(InOrderCachePort, - "[tid:%i]: Address %08p removed from dependency list\n", - reqMap[slot_num]->inst->readTid(), (*vect_it)); + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n", + inst->readTid(), inst->seqNum, req_addr); +} + +void +CacheUnit::removeAddrDependency(DynInstPtr inst) +{ + ThreadID tid = inst->readTid(); + + Addr mem_addr = inst->getMemAddr(); + + // Erase from Address List + vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(), + mem_addr); + assert(vect_it != addrList[tid].end() || inst->splitInst); + + if (vect_it != addrList[tid].end()) { + DPRINTF(AddrDep, + "[tid:%i]: [sn:%i] Address %08p removed from dependency list\n", + inst->readTid(), inst->seqNum, (*vect_it)); + + addrList[tid].erase(vect_it); + + // Erase From Address Map (Used for Debugging) + addrMap[tid].erase(addrMap[tid].find(mem_addr)); + } + + +} + +ResReqPtr +CacheUnit::findRequest(DynInstPtr inst) +{ + map<int, ResReqPtr>::iterator map_it = reqMap.begin(); + map<int, ResReqPtr>::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second); + assert(cache_req); + + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == inst->resSched.top()->idx) { + return cache_req; + } + map_it++; + } + + return NULL; +} + +ResReqPtr +CacheUnit::findSplitRequest(DynInstPtr inst, int idx) +{ + map<int, ResReqPtr>::iterator map_it = reqMap.begin(); + map<int, ResReqPtr>::iterator map_end = reqMap.end(); + + while (map_it != map_end) { + CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second); + assert(cache_req); - addrList.erase(vect_it); + if (cache_req && + cache_req->getInst() == inst && + cache_req->instIdx == idx) { + return cache_req; + } + map_it++; + } - Resource::freeSlot(slot_num); + return NULL; } + ResReqPtr CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -195,6 +279,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, switch (sched_entry->cmd) { + case InitSecondSplitRead: + pkt_cmd = MemCmd::ReadReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Read request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateReadData: pkt_cmd = MemCmd::ReadReq; @@ -203,6 +295,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, inst->readTid(), inst->seqNum, inst->getMemAddr()); break; + case InitSecondSplitWrite: + pkt_cmd = MemCmd::WriteReq; + + DPRINTF(InOrderCachePort, + "[tid:%i]: Write request from [sn:%i] for addr %08p\n", + inst->readTid(), inst->seqNum, inst->split2ndAddr); + break; + case InitiateWriteData: pkt_cmd = MemCmd::WriteReq; @@ -226,7 +326,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, return new CacheRequest(this, inst, stage_num, id, slot_num, sched_entry->cmd, 0, pkt_cmd, - 0/*flags*/, this->cpu->readCpuId()); + 0/*flags*/, this->cpu->readCpuId(), + inst->resSched.top()->idx); } void @@ -237,15 +338,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request) // Check to see if this instruction is requesting the same command // or a different one - if (cache_req->cmd != inst->resSched.top()->cmd) { + if (cache_req->cmd != inst->resSched.top()->cmd && + cache_req->instIdx == inst->resSched.top()->idx) { // If different, then update command in the request cache_req->cmd = inst->resSched.top()->cmd; DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Updating the command for this instruction\n", - inst->readTid(), inst->seqNum); + "[tid:%i]: [sn:%i]: Updating the command for this " + "instruction\n ", inst->readTid(), inst->seqNum); service_request = true; - } else { + } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead && + inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) { // If same command, just check to see if memory access was completed // but dont try to re-execute DPRINTF(InOrderCachePort, @@ -271,12 +374,25 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size, cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->fetchMemReq; } else { - inst->dataMemReq = new Request(inst->readTid(), aligned_addr, + if (!cache_req->is2ndSplit()) { + inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr, acc_size, flags, inst->readPC(), cpu->readCpuId(), inst->readTid()); cache_req->memReq = inst->dataMemReq; + } else { + assert(inst->splitInst); + + inst->splitMemReq = new Request(cpu->asid[tid], + inst->split2ndAddr, + acc_size, + flags, + inst->readPC(), + cpu->readCpuId(), + tid); + cache_req->memReq = inst->splitMemReq; + } } - + cache_req->fault = _tlb->translateAtomic(cache_req->memReq, @@ -311,14 +427,93 @@ Fault CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags) { CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst)); - assert(cache_req); + assert(cache_req && "Can't Find Instruction for Read!"); + + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + + if (secondAddr > addr && !inst->split2ndAccess) { + DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, + addr, secondAddr); + + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitMemData = new uint8_t[dataSize]; + inst->splitTotalSize = dataSize; + + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitRead, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitRead, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read); + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndDataPtr = inst->splitMemData + dataSize; + inst->split2ndFlags = flags; + } + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read); if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, NULL); + if (!cache_req->splitAccess) { + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, NULL); + } else { + if (!inst->split2ndAccess) { + cache_req->reqData = inst->splitMemData; + } else { + cache_req->reqData = inst->split2ndDataPtr; + } + + doCacheAccess(inst, NULL, cache_req); + } } return cache_req->fault; @@ -330,16 +525,93 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, uint64_t *write_res) { CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst)); - assert(cache_req); - - int acc_size = sizeof(T); - doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write); + assert(cache_req && "Can't Find Instruction for Write!"); + + // The block size of our peer + unsigned blockSize = this->cachePort->peerBlockSize(); + + //The size of the data we're trying to read. + int dataSize = sizeof(T); + + if (inst->split2ndAccess) { + dataSize = inst->split2ndSize; + cache_req->splitAccess = true; + cache_req->split2ndAccess = true; + + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, + inst->getMemAddr(), inst->split2ndAddr); + } + + //The address of the second part of this access if it needs to be split + //across a cache line boundary. + Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); + + if (secondAddr > addr && !inst->split2ndAccess) { + + DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum, + addr, secondAddr); + + // Save All "Total" Split Information + // ============================== + inst->splitInst = true; + inst->splitTotalSize = dataSize; + + if (!inst->splitInstSked) { + // Schedule Split Read/Complete for Instruction + // ============================== + int stage_num = cache_req->getStageNum(); + + int stage_pri = ThePipeline::getNextPriority(inst, stage_num); + + inst->resSched.push(new ScheduleEntry(stage_num, + stage_pri, + cpu->resPool->getResIdx(DCache), + CacheUnit::InitSecondSplitWrite, + 1) + ); + + inst->resSched.push(new ScheduleEntry(stage_num + 1, + 1/*stage_pri*/, + cpu->resPool->getResIdx(DCache), + CacheUnit::CompleteSecondSplitWrite, + 1) + ); + inst->splitInstSked = true; + } else { + DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", + inst->readTid(), inst->seqNum, addr, secondAddr); + } + + + + // Split Information for First Access + // ============================== + dataSize = secondAddr - addr; + cache_req->splitAccess = true; + + // Split Information for Second Access + // ============================== + inst->split2ndSize = addr + sizeof(T) - secondAddr; + inst->split2ndAddr = secondAddr; + inst->split2ndStoreDataPtr = &cache_req->inst->storeData; + inst->split2ndStoreDataPtr += dataSize; + inst->split2ndFlags = flags; + inst->splitInstSked = true; + } + + doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write); if (cache_req->fault == NoFault) { - cache_req->reqData = new uint8_t[acc_size]; - doCacheAccess(inst, write_res); + if (!cache_req->splitAccess) { + // Remove this line since storeData is saved in INST? + cache_req->reqData = new uint8_t[dataSize]; + doCacheAccess(inst, write_res); + } else { + doCacheAccess(inst, write_res, cache_req); + } + } - + return cache_req->fault; } @@ -347,8 +619,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags, void CacheUnit::execute(int slot_num) { - if (cacheBlocked) { - DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n"); + if (cachePortBlocked) { + DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n"); return; } @@ -359,6 +631,8 @@ CacheUnit::execute(int slot_num) #if TRACING_ON ThreadID tid = inst->readTid(); int seq_num = inst->seqNum; + std::string acc_type = "write"; + #endif cache_req->fault = NoFault; @@ -390,10 +664,14 @@ CacheUnit::execute(int slot_num) } case InitiateReadData: +#if TRACING_ON + acc_type = "read"; +#endif case InitiateWriteData: + DPRINTF(InOrderCachePort, - "[tid:%u]: Initiating data access to %s for addr. %08p\n", - tid, name(), cache_req->inst->getMemAddr()); + "[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n", + tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr()); inst->setCurResSlot(slot_num); @@ -402,9 +680,29 @@ CacheUnit::execute(int slot_num) } else { inst->initiateAcc(); } + + break; + + case InitSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->split2ndAddr); + inst->split2ndAccess = true; + assert(inst->split2ndAddr != 0); + read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags); + break; + + case InitSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n", + tid, inst->seqNum, name(), cache_req->inst->getMemAddr()); + inst->split2ndAccess = true; + assert(inst->split2ndAddr != 0); + write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL); break; + case CompleteFetch: if (cache_req->isMemAccComplete()) { DPRINTF(InOrderCachePort, @@ -415,16 +713,24 @@ CacheUnit::execute(int slot_num) DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n", tid, seq_num, inst->staticInst->disassemble(inst->PC)); + removeAddrDependency(inst); + delete cache_req->dataPkt; + + // Do not stall and switch threads for fetch... for now.. + // TODO: We need to detect cache misses for latencies > 1 + // cache_req->setMemStall(false); + cache_req->done(); } else { DPRINTF(InOrderCachePort, - "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", + "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n", tid, inst->seqNum); DPRINTF(InOrderStall, "STALL: [tid:%i]: Fetch miss from %08p\n", tid, cache_req->inst->readPC()); cache_req->setCompleted(false); + //cache_req->setMemStall(true); } break; @@ -437,14 +743,55 @@ CacheUnit::execute(int slot_num) if (cache_req->isMemAccComplete() || inst->isDataPrefetch() || inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); cache_req->done(); } else { DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", tid, cache_req->inst->getMemAddr()); cache_req->setCompleted(false); + cache_req->setMemStall(true); } break; + case CompleteSecondSplitRead: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; + + case CompleteSecondSplitWrite: + DPRINTF(InOrderCachePort, + "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n", + tid, inst->seqNum); + + if (cache_req->isMemAccComplete() || + inst->isDataPrefetch() || + inst->isInstPrefetch()) { + removeAddrDependency(inst); + cache_req->setMemStall(false); + cache_req->done(); + } else { + DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n", + tid, cache_req->inst->split2ndAddr); + cache_req->setCompleted(false); + cache_req->setMemStall(true); + } + break; + default: fatal("Unrecognized command to %s", resName); } @@ -462,8 +809,7 @@ CacheUnit::prefetch(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -482,8 +828,7 @@ CacheUnit::writeHint(DynInstPtr inst) // Clean-Up cache resource request so // other memory insts. can use them cache_req->setCompleted(); - cacheStatus = cacheAccessComplete; - cacheBlocked = false; + cachePortBlocked = false; cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); inst->unsetMemAddr(); @@ -491,15 +836,21 @@ CacheUnit::writeHint(DynInstPtr inst) // @TODO: Split into doCacheRead() and doCacheWrite() Fault -CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) +CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req) { Fault fault = NoFault; #if TRACING_ON ThreadID tid = inst->readTid(); #endif - CacheReqPtr cache_req - = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]); + CacheReqPtr cache_req; + + if (split_req == NULL) { + cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]); + } else{ + cache_req = split_req; + } + assert(cache_req); // Check for LL/SC and if so change command @@ -510,25 +861,28 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (cache_req->pktCmd == MemCmd::WriteReq) { cache_req->pktCmd = cache_req->memReq->isSwap() ? MemCmd::SwapReq : - (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); + (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq + : MemCmd::WriteReq); } cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd, - Packet::Broadcast); + Packet::Broadcast, cache_req->instIdx); if (cache_req->dataPkt->isRead()) { cache_req->dataPkt->dataStatic(cache_req->reqData); - } else if (cache_req->dataPkt->isWrite()) { - cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); - + } else if (cache_req->dataPkt->isWrite()) { + if (inst->split2ndAccess) { + cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr); + } else { + cache_req->dataPkt->dataStatic(&cache_req->inst->storeData); + } + if (cache_req->memReq->isCondSwap()) { assert(write_res); cache_req->memReq->setExtraData(*write_res); } } - cache_req->dataPkt->time = curTick; - bool do_access = true; // flag to suppress cache access Request *memReq = cache_req->dataPkt->req; @@ -546,28 +900,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res) if (do_access) { if (!cachePort->sendTiming(cache_req->dataPkt)) { DPRINTF(InOrderCachePort, - "[tid:%i] [sn:%i] is waiting to retry request\n", - tid, inst->seqNum); - - retrySlot = cache_req->getSlot(); - retryReq = cache_req; - retryPkt = cache_req->dataPkt; - - cacheStatus = cacheWaitRetry; - - //cacheBlocked = true; - - DPRINTF(InOrderStall, "STALL: \n"); - + "[tid:%i] [sn:%i] cannot access cache, because port " + "is blocked. now waiting to retry request\n", tid, + inst->seqNum); cache_req->setCompleted(false); + cachePortBlocked = true; } else { DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] is now waiting for cache response\n", tid, inst->seqNum); cache_req->setCompleted(); cache_req->setMemAccPending(); - cacheStatus = cacheWaitResponse; - cacheBlocked = false; + cachePortBlocked = false; } } else if (!do_access && memReq->isLLSC()){ // Store-Conditional instructions complete even if they "failed" @@ -594,6 +938,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) { // Cast to correct packet type CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt); + assert(cache_pkt); if (cache_pkt->cacheReq->isSquashed()) { @@ -601,9 +946,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", cache_pkt->cacheReq->getInst()->readTid(), cache_pkt->cacheReq->getInst()->seqNum); + DPRINTF(RefCount, + "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n", + cache_pkt->cacheReq->getTid(), + cache_pkt->cacheReq->seqNum); cache_pkt->cacheReq->done(); delete cache_pkt; + + cpu->wakeCPU(); + return; } @@ -615,7 +967,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) // Cast to correct request type CacheRequest *cache_req = dynamic_cast<CacheReqPtr>( - findRequest(cache_pkt->cacheReq->getInst())); + findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx)); + + if (!cache_req) { + warn( + "[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n", + cache_pkt->cacheReq->getInst()->readTid(), + cache_pkt->cacheReq->getInst()->seqNum, + cache_pkt->cacheReq->getInst()->getMemAddr()); + } + assert(cache_req); @@ -641,8 +1002,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) ExtMachInst ext_inst; StaticInstPtr staticInst = NULL; Addr inst_pc = inst->readPC(); - MachInst mach_inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> - (cache_pkt->getPtr<uint8_t>())); + MachInst mach_inst = + TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *> + (cache_pkt->getPtr<uint8_t>())); predecoder.setTC(cpu->thread[tid]->getTC()); predecoder.moreBytes(inst_pc, inst_pc, mach_inst); @@ -660,9 +1022,33 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) DPRINTF(InOrderCachePort, "[tid:%u]: [sn:%i]: Processing cache access\n", tid, inst->seqNum); - - inst->completeAcc(pkt); - + + if (inst->splitInst) { + inst->splitFinishCnt++; + + if (inst->splitFinishCnt == 2) { + cache_req->memReq->setVirt(0/*inst->tid*/, + inst->getMemAddr(), + inst->splitTotalSize, + 0, + 0); + + Packet split_pkt(cache_req->memReq, cache_req->pktCmd, + Packet::Broadcast); + + + if (inst->isLoad()) { + split_pkt.dataStatic(inst->splitMemData); + } else { + split_pkt.dataStatic(&inst->storeData); + } + + inst->completeAcc(&split_pkt); + } + } else { + inst->completeAcc(pkt); + } + if (inst->isLoad()) { assert(cache_pkt->isRead()); @@ -696,6 +1082,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) cache_req->setMemAccPending(false); cache_req->setMemAccCompleted(); + if (cache_req->isMemStall() && + cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) { + DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid); + + cpu->activateContext(tid); + + DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache" + "miss.\n", tid); + } + // Wake up the CPU (if it went to sleep and was waiting on this // completion event). cpu->wakeCPU(); @@ -717,22 +1113,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt) void CacheUnit::recvRetry() { - DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); + DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n"); + + assert(cachePortBlocked); - assert(retryPkt != NULL); - assert(cacheBlocked); - assert(cacheStatus == cacheWaitRetry); + // Clear the cache port for use again + cachePortBlocked = false; - if (cachePort->sendTiming(retryPkt)) { - cacheStatus = cacheWaitResponse; - retryPkt = NULL; - cacheBlocked = false; - } else { - DPRINTF(InOrderCachePort, - "Retry Request for [tid:%i] [sn:%i] failed\n", - retryReq->inst->readTid(), retryReq->inst->seqNum); - } + cpu->wakeCPU(); } CacheUnitEvent::CacheUnitEvent() @@ -755,7 +1143,8 @@ CacheUnitEvent::process() tlb_res->tlbBlocked[tid] = false; - tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid); + tlb_res->cpu->pipelineStage[stage_num]-> + unsetResStall(tlb_res->reqMap[slotIdx], tid); req_ptr->tlbStall = false; @@ -765,6 +1154,26 @@ CacheUnitEvent::process() } void +CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid) +{ + // If squashing due to memory stall, then we do NOT want to + // squash the instruction that caused the stall so we + // increment the sequence number here to prevent that. + // + // NOTE: This is only for the SwitchOnCacheMiss Model + // NOTE: If you have multiple outstanding misses from the same + // thread then you need to reevaluate this code + // NOTE: squash should originate from + // pipeline_stage.cc:processInstSchedule + DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n", + squash_seq_num + 1); + + squash(inst, stage_num, squash_seq_num + 1, tid); +} + + +void CacheUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid) { @@ -784,6 +1193,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, "[tid:%i] Squashing request from [sn:%i]\n", req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + if (req_ptr->isSquashed()) { + DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum); + map_it++; + continue; + } + req_ptr->setSquashed(); req_ptr->getInst()->setSquashed(); @@ -798,7 +1215,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, int stall_stage = reqMap[req_slot_num]->getStageNum(); - cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid); + cpu->pipelineStage[stall_stage]-> + unsetResStall(reqMap[req_slot_num], tid); } if (!cache_req->tlbStall && !cache_req->isMemAccPending()) { @@ -807,7 +1225,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num, // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); + + DPRINTF(InOrderCachePort, + "[tid:%i] Squashing request from [sn:%i]\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + } else { + DPRINTF(InOrderCachePort, + "[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); + DPRINTF(RefCount, + "[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n", + req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum, + req_ptr->getInst()->splitInst); } + + if (req_ptr->getInst()->validMemAddr()) { + DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n", + req_ptr->getInst()->readTid(), + req_ptr->getInst()->seqNum, + req_ptr->getInst()->getMemAddr()); + + removeAddrDependency(req_ptr->getInst()); + } + } map_it++; @@ -927,14 +1367,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr, template<> Fault -CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint64_t*)&data, addr, flags, res); } template<> Fault -CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, *(uint32_t*)&data, addr, flags, res); } @@ -942,7 +1384,9 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_ template<> Fault -CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res) +CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, + uint64_t *res) { return write(inst, (uint32_t)data, addr, flags, res); } + diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh index c467e9771..9004f3b93 100644 --- a/src/cpu/inorder/resources/cache_unit.hh +++ b/src/cpu/inorder/resources/cache_unit.hh @@ -62,7 +62,6 @@ class CacheUnit : public Resource public: CacheUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~CacheUnit() {} enum Command { InitiateFetch, @@ -73,7 +72,11 @@ class CacheUnit : public Resource CompleteWriteData, Fetch, ReadData, - WriteData + WriteData, + InitSecondSplitRead, + InitSecondSplitWrite, + CompleteSecondSplitRead, + CompleteSecondSplitWrite }; public: @@ -119,24 +122,19 @@ class CacheUnit : public Resource virtual void recvRetry(); }; - enum CachePortStatus { - cacheWaitResponse, - cacheWaitRetry, - cacheAccessComplete - }; - void init(); virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num, int res_idx, int slot_num, unsigned cmd); + ResReqPtr findRequest(DynInstPtr inst); + ResReqPtr findSplitRequest(DynInstPtr inst, int idx); + void requestAgain(DynInstPtr inst, bool &try_request); int getSlot(DynInstPtr inst); - void freeSlot(int slot_num); - /** Execute the function of this resource. The Default is action * is to do nothing. More specific models will derive from this * class and define their own execute function. @@ -146,6 +144,9 @@ class CacheUnit : public Resource void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void squashDueToMemStall(DynInstPtr inst, int stage_num, + InstSeqNum squash_seq_num, ThreadID tid); + /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); @@ -159,7 +160,7 @@ class CacheUnit : public Resource /** Returns a specific port. */ Port *getPort(const std::string &if_name, int idx); - + template <class T> Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags); @@ -173,7 +174,7 @@ class CacheUnit : public Resource /** Read/Write on behalf of an instruction. * curResSlot needs to be a valid value in instruction. */ - Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL); + Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL); void prefetch(DynInstPtr inst); @@ -181,23 +182,18 @@ class CacheUnit : public Resource uint64_t getMemData(Packet *packet); + void setAddrDependency(DynInstPtr inst); + void removeAddrDependency(DynInstPtr inst); + protected: /** Cache interface. */ CachePort *cachePort; - CachePortStatus cacheStatus; - - CacheReqPtr retryReq; + bool cachePortBlocked; - PacketPtr retryPkt; - - int retrySlot; - - bool cacheBlocked; - - std::vector<Addr> addrList; + std::vector<Addr> addrList[ThePipeline::MaxThreads]; - std::map<Addr, InstSeqNum> addrMap; + std::map<Addr, InstSeqNum> addrMap[ThePipeline::MaxThreads]; public: int cacheBlkSize; @@ -249,17 +245,18 @@ class CacheRequest : public ResourceRequest public: CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd, int req_size, - MemCmd::Command pkt_cmd, unsigned flags, int cpu_id) + MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx) : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd), pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL), retryPkt(NULL), memAccComplete(false), memAccPending(false), - tlbStall(false) + tlbStall(false), splitAccess(false), splitAccessNum(-1), + split2ndAccess(false), instIdx(idx) { } virtual ~CacheRequest() { - if (reqData) { + if (reqData && !splitAccess) { delete [] reqData; } } @@ -273,6 +270,11 @@ class CacheRequest : public ResourceRequest memAccComplete = completed; } + bool is2ndSplit() + { + return split2ndAccess; + } + bool isMemAccComplete() { return memAccComplete; } void setMemAccPending(bool pending = true) { memAccPending = pending; } @@ -288,19 +290,27 @@ class CacheRequest : public ResourceRequest bool memAccComplete; bool memAccPending; bool tlbStall; + + bool splitAccess; + int splitAccessNum; + bool split2ndAccess; + int instIdx; + }; class CacheReqPacket : public Packet { public: CacheReqPacket(CacheRequest *_req, - Command _cmd, short _dest) - : Packet(_req->memReq, _cmd, _dest), cacheReq(_req) + Command _cmd, short _dest, int _idx = 0) + : Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx) { } CacheRequest *cacheReq; + int instIdx; + }; #endif //__CPU_CACHE_UNIT_HH__ diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc index 6c44e2456..429291231 100644 --- a/src/cpu/inorder/resources/execution_unit.cc +++ b/src/cpu/inorder/resources/execution_unit.cc @@ -54,6 +54,17 @@ ExecutionUnit::regStats() .name(name() + ".predictedNotTakenIncorrect") .desc("Number of Branches Incorrectly Predicted As Not Taken)."); + lastExecuteCycle = curTick; + + cyclesExecuted + .name(name() + ".cyclesExecuted") + .desc("Number of Cycles Execution Unit was used."); + + utilization + .name(name() + ".utilization") + .desc("Utilization of Execution Unit (cycles / totalCycles)."); + utilization = cyclesExecuted / cpu->numCycles; + Resource::regStats(); } @@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num) { case ExecuteInst: { + if (curTick != lastExecuteCycle) { + lastExecuteCycle = curTick; + cyclesExecuted++; + } + + if (inst->isMemRef()) { panic("%s not configured to handle memory ops.\n", resName); } else if (inst->isControl()) { diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh index 46691bbf2..b9cf1d428 100644 --- a/src/cpu/inorder/resources/execution_unit.hh +++ b/src/cpu/inorder/resources/execution_unit.hh @@ -52,7 +52,6 @@ class ExecutionUnit : public Resource { public: ExecutionUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~ExecutionUnit() {} public: virtual void regStats(); @@ -71,6 +70,11 @@ class ExecutionUnit : public Resource { ///////////////////////////////////////////////////////////////// Stats::Scalar predictedTakenIncorrect; Stats::Scalar predictedNotTakenIncorrect; + + Stats::Scalar cyclesExecuted; + Tick lastExecuteCycle; + + Stats::Formula utilization; }; diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc index 1d0b92075..03663881c 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.cc +++ b/src/cpu/inorder/resources/fetch_seq_unit.cc @@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width, } } +FetchSeqUnit::~FetchSeqUnit() +{ + delete [] resourceEvent; +} + void FetchSeqUnit::init() { @@ -336,3 +341,35 @@ FetchSeqUnit::deactivateThread(ThreadID tid) if (thread_it != cpu->fetchPriorityList.end()) cpu->fetchPriorityList.erase(thread_it); } + +void +FetchSeqUnit::suspendThread(ThreadID tid) +{ + deactivateThread(tid); +} + +void +FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + pcValid[tid] = true; + + if (cpu->thread[tid]->lastGradIsBranch) { + /** This function assumes that the instruction causing the context + * switch was right after the branch. Thus, if it's not, then + * we are updating incorrectly here + */ + assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC()); + + PC[tid] = cpu->thread[tid]->lastBranchNextNPC; + nextPC[tid] = PC[tid] + instSize; + nextNPC[tid] = nextPC[tid] + instSize; + } else { + PC[tid] = inst->readNextPC(); + nextPC[tid] = inst->readNextNPC(); + nextNPC[tid] = inst->readNextNPC() + instSize; + } + + DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch." + "Assigning PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid], + nextPC[tid], nextNPC[tid]); +} diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh index a4495564b..289e150aa 100644 --- a/src/cpu/inorder/resources/fetch_seq_unit.hh +++ b/src/cpu/inorder/resources/fetch_seq_unit.hh @@ -54,12 +54,15 @@ class FetchSeqUnit : public Resource { public: FetchSeqUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~FetchSeqUnit() {} - + virtual ~FetchSeqUnit(); + virtual void init(); virtual void activateThread(ThreadID tid); virtual void deactivateThread(ThreadID tid); + virtual void suspendThread(ThreadID tid); virtual void execute(int slot_num); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + /** Override default Resource squash sequence. This actually, * looks in the global communication buffer to get squash diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc index 2d7cd5c8c..2dad9889a 100644 --- a/src/cpu/inorder/resources/graduation_unit.cc +++ b/src/cpu/inorder/resources/graduation_unit.cc @@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num) "[tid:%i] Graduating instruction [sn:%i].\n", tid, inst->seqNum); - DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/); - // Release Non-Speculative "Block" on instructions that could not execute // because there was a non-speculative inst. active. // @TODO: Fix this functionality. Probably too conservative. diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh index ad222b119..7f0db98d0 100644 --- a/src/cpu/inorder/resources/graduation_unit.hh +++ b/src/cpu/inorder/resources/graduation_unit.hh @@ -63,8 +63,6 @@ class GraduationUnit : public Resource { bool *nonSpecInstActive[ThePipeline::MaxThreads]; InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads]; - - /** @todo: Add Resource Stats Here */ }; #endif //__CPU_INORDER_GRAD_UNIT_HH__ diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc index bb308b0ea..17b308db0 100644 --- a/src/cpu/inorder/resources/inst_buffer.cc +++ b/src/cpu/inorder/resources/inst_buffer.cc @@ -52,7 +52,8 @@ InstBuffer::regStats() { instsBypassed .name(name() + ".instsBypassed") - .desc("Number of Instructions Bypassed."); + .desc("Number of Instructions Bypassed.") + .prereq(instsBypassed); Resource::regStats(); } diff --git a/src/cpu/inorder/resources/mult_div_unit.hh b/src/cpu/inorder/resources/mult_div_unit.hh index d3dd0260d..19688b09f 100644 --- a/src/cpu/inorder/resources/mult_div_unit.hh +++ b/src/cpu/inorder/resources/mult_div_unit.hh @@ -57,7 +57,6 @@ class MultDivUnit : public Resource { public: MultDivUnit(std::string res_name, int res_id, int res_width, int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params); - virtual ~MultDivUnit() {} public: /** Override default Resource getSlot(). Will only getSlot if diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc index 36392d054..5fd6a4724 100644 --- a/src/cpu/inorder/resources/use_def.cc +++ b/src/cpu/inorder/resources/use_def.cc @@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width, } +void +UseDefUnit::regStats() +{ + uniqueRegsPerSwitch + .name(name() + ".uniqueRegsPerSwitch") + .desc("Number of Unique Registers Needed Per Context Switch") + .prereq(uniqueRegsPerSwitch); + + Resource::regStats(); +} + ResReqPtr UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx, int slot_num, unsigned cmd) @@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst) map<int, ResReqPtr>::iterator map_end = reqMap.end(); while (map_it != map_end) { - UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>((*map_it).second); + UseDefRequest* ud_req = + dynamic_cast<UseDefRequest*>((*map_it).second); assert(ud_req); if (ud_req && @@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx) // in the pipeline then stall instructions here if (*nonSpecInstActive[tid] == true && seq_num > *nonSpecSeqNum[tid]) { - DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is " - "non-speculative instruction [sn:%i] has not graduated.\n", - tid, seq_num, *nonSpecSeqNum[tid]); + DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because" + "there is non-speculative instruction [sn:%i] has not " + "graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]); return; } else if (inst->isNonSpeculative()) { *nonSpecInstActive[tid] = true; @@ -121,91 +133,134 @@ UseDefUnit::execute(int slot_idx) case ReadSrcReg: { int reg_idx = inst->_srcRegIdx[ud_idx]; - - DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n", + + DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source " + "register idx %i (reg #%i).\n", tid, ud_idx, reg_idx); - // Ask register dependency map if it is OK to read from Arch. Reg. File + // Ask register dependency map if it is OK to read from Arch. + // Reg. File if (regDepMap[tid]->canRead(reg_idx, inst)) { + + uniqueRegMap[reg_idx] = true; + if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i" + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readIntReg(reg_idx,inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readIntReg(reg_idx,inst->readTid())); + cpu->readIntReg(reg_idx, + inst->readTid())); } else if (reg_idx < Ctrl_Base_DepTag) { reg_idx -= FP_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i" + "from Register File:%x (%08f).\n", tid, reg_idx, - cpu->readFloatRegBits(reg_idx, inst->readTid()), - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatRegBits(reg_idx, + inst->readTid()), + cpu->readFloatReg(reg_idx, + inst->readTid())); inst->setFloatSrc(ud_idx, - cpu->readFloatReg(reg_idx, inst->readTid())); + cpu->readFloatReg(reg_idx, + inst->readTid())); } else { reg_idx -= Ctrl_Base_DepTag; - DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n", - tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid())); + DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i " + "from Register File:%i.\n", + tid, + reg_idx, + cpu->readMiscReg(reg_idx, + inst->readTid())); inst->setIntSrc(ud_idx, - cpu->readMiscReg(reg_idx, inst->readTid())); + cpu->readMiscReg(reg_idx, + inst->readTid())); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" - " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because " + "of [sn:%i] hasnt read it's registers yet.\n", + tid, outReadSeqNum[tid]); + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to write\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { // Look for forwarding opportunities - DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst); + DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, + ud_idx, + inst); if (forward_inst) { if (inst->seqNum <= outReadSeqNum[tid]) { - int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx); + int dest_reg_idx = + forward_inst->getDestIdxNum(reg_idx); if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } else if (reg_idx < Ctrl_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readFloatResult(dest_reg_idx) , + tid, + forward_inst->readFloatResult(dest_reg_idx), forward_inst->seqNum, inst->seqNum, ud_idx); inst->setFloatSrc(ud_idx, - forward_inst->readFloatResult(dest_reg_idx)); + forward_inst-> + readFloatResult(dest_reg_idx)); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from " + DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest." + " reg value 0x%x from " "[sn:%i] to [sn:%i] source #%i.\n", - tid, forward_inst->readIntResult(dest_reg_idx) , - forward_inst->seqNum, inst->seqNum, ud_idx); - inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx)); + tid, + forward_inst->readIntResult(dest_reg_idx), + forward_inst->seqNum, + inst->seqNum, ud_idx); + inst->setIntSrc(ud_idx, + forward_inst-> + readIntResult(dest_reg_idx)); } outReadSeqNum[tid] = maxSeqNum; ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read " + "because of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum[tid]); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to forward\n", tid, outReadSeqNum[tid]); + ud_req->done(false); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i" + "is not ready to read.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read " + "register (idx=%i)\n", tid, reg_idx); outReadSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } } @@ -216,12 +271,14 @@ UseDefUnit::execute(int slot_idx) int reg_idx = inst->_destRegIdx[ud_idx]; if (regDepMap[tid]->canWrite(reg_idx, inst)) { - DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &" + "Attempting to write to Register File.\n", tid, reg_idx); - + uniqueRegMap[reg_idx] = true; if (inst->seqNum <= outReadSeqNum[tid]) { if (reg_idx < FP_Base_DepTag) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result " + "0x%x to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -236,33 +293,54 @@ UseDefUnit::execute(int slot_idx) reg_idx -= FP_Base_DepTag; - if (inst->resultType(ud_idx) == InOrderDynInst::Integer) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); - - cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here + if (inst->resultType(ud_idx) == + InOrderDynInst::Integer) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); + + // Check for FloatRegBits Here + cpu->setFloatRegBits(reg_idx, inst->readIntResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Float) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Float) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); - } else if (inst->resultType(ud_idx) == InOrderDynInst::Double) { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n", - tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx); - - cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here + } else if (inst->resultType(ud_idx) == + InOrderDynInst::Double) { + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double " + "Result 0x%x (bits:0x%x) to register " + "idx %i.\n", + tid, + inst->readFloatResult(ud_idx), + inst->readIntResult(ud_idx), + reg_idx); + + // Check for FloatRegBits Here + cpu->setFloatReg(reg_idx, inst->readFloatResult(ud_idx), inst->readTid()); } else { - panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName()); + panic("Result Type Not Set For [sn:%i] %s.\n", + inst->seqNum, inst->instName()); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x " + "to register idx %i.\n", tid, inst->readIntResult(ud_idx), reg_idx); // Remove Dependencies @@ -279,17 +357,23 @@ UseDefUnit::execute(int slot_idx) ud_req->done(); } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's" + DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because " + "of [sn:%i] hasnt read it's" " registers yet.\n", tid, outReadSeqNum); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for " + "[sn:%i] to read\n", tid, outReadSeqNum); + ud_req->done(false); } } else { - DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n", + DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is " + "not ready to write.\n", tid, reg_idx); - DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n", + DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write " + "register (idx=%i)\n", tid, reg_idx); outWriteSeqNum[tid] = inst->seqNum; + ud_req->done(false); } } break; @@ -323,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum); - regDepMap[tid]->remove(req_ptr->getInst()); - int req_slot_num = req_ptr->getSlot(); - if (latency > 0) + if (latency > 0) { + assert(0); + unscheduleEvent(req_slot_num); + } + + // Mark request for later removal + cpu->reqRemoveList.push(req_ptr); // Mark slot for removal from resource slot_remove_list.push_back(req_ptr->getSlot()); @@ -343,18 +431,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, } if (outReadSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", + tid); outReadSeqNum[tid] = maxSeqNum; } else if (outReadSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read " + "Seq Num %i\n", tid, outReadSeqNum[tid]); } if (outWriteSeqNum[tid] >= squash_seq_num) { - DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid); + DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", + tid); outWriteSeqNum[tid] = maxSeqNum; } else if (outWriteSeqNum[tid] != maxSeqNum) { - DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n", + DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write " + "Seq Num %i\n", tid, outWriteSeqNum[tid]); } } + +void +UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) +{ + uniqueRegsPerSwitch = uniqueRegMap.size(); + uniqueRegMap.clear(); +} diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh index 6c76d8ab5..41d758dd7 100644 --- a/src/cpu/inorder/resources/use_def.hh +++ b/src/cpu/inorder/resources/use_def.hh @@ -68,8 +68,12 @@ class UseDefUnit : public Resource { virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, ThreadID tid); + void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid); + const InstSeqNum maxSeqNum; + void regStats(); + protected: RegDepMap *regDepMap[ThePipeline::MaxThreads]; @@ -84,14 +88,18 @@ class UseDefUnit : public Resource { InstSeqNum floatRegSize[ThePipeline::MaxThreads]; + Stats::Average uniqueRegsPerSwitch; + std::map<unsigned, bool> uniqueRegMap; + public: class UseDefRequest : public ResourceRequest { public: typedef ThePipeline::DynInstPtr DynInstPtr; public: - UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx, - int slot_num, unsigned cmd, int use_def_idx) + UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, + int res_idx, int slot_num, unsigned cmd, + int use_def_idx) : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd), useDefIdx(use_def_idx) { } |