From 792d5b9e5ee40e58b922ae32e5a6ee9aa9586cbc Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 18 May 2007 22:35:04 -0700 Subject: First set of changes for reorganized cache coherence support. Compiles but doesn't work... committing just so I can merge (stupid bk!). src/mem/bridge.cc: Get rid of SNOOP_COMMIT. src/mem/bus.cc: src/mem/packet.hh: Get rid of SNOOP_COMMIT & two-pass snoop. First bits of EXPRESS_SNOOP support. src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: src/mem/cache/cache.hh: src/mem/cache/cache_impl.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/miss_queue.cc: src/mem/cache/prefetch/base_prefetcher.cc: Big reorg of ports and port-related functions & events. src/mem/cache/cache.cc: src/mem/cache/cache_builder.cc: src/mem/cache/coherence/SConscript: Get rid of UniCoherence object. --HG-- extra : convert_revision : 7672434fa3115c9b1c94686f497e57e90413b7c3 --- src/mem/bridge.cc | 6 - src/mem/bus.cc | 43 ++-- src/mem/cache/base_cache.cc | 315 +++++++++-------------------- src/mem/cache/base_cache.hh | 321 +++++------------------------- src/mem/cache/cache.cc | 6 - src/mem/cache/cache.hh | 52 +++-- src/mem/cache/cache_builder.cc | 10 +- src/mem/cache/cache_impl.hh | 278 ++++++++++++++++++++------ src/mem/cache/coherence/SConscript | 1 - src/mem/cache/coherence/uni_coherence.cc | 135 ------------- src/mem/cache/coherence/uni_coherence.hh | 146 -------------- src/mem/cache/miss/blocking_buffer.cc | 14 +- src/mem/cache/miss/miss_queue.cc | 16 +- src/mem/cache/prefetch/base_prefetcher.cc | 8 +- src/mem/packet.hh | 2 +- 15 files changed, 435 insertions(+), 918 deletions(-) delete mode 100644 src/mem/cache/coherence/uni_coherence.cc delete mode 100644 src/mem/cache/coherence/uni_coherence.hh diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index f525ccb48..5460c88dd 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -112,10 +112,6 @@ Bridge::BridgePort::reqQueueFull() bool Bridge::BridgePort::recvTiming(PacketPtr pkt) { - if (!(pkt->flags & SNOOP_COMMIT)) - return true; - - DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr()); @@ -255,8 +251,6 @@ Bridge::BridgePort::trySend() PacketPtr pkt = buf->pkt; - pkt->flags &= ~SNOOP_COMMIT; //CLear it if it was set - if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite && pkt->result != Packet::Nacked && pkt->getOffset(pbs) && pkt->getSize() != pbs) { diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 95d4e2873..895123f8b 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -182,8 +182,10 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (tickNextIdle > curTick || - (retryList.size() && (!inRetry || pktPort != retryList.front()))) { + if (!(pkt->flags & EXPRESS_SNOOP) && + tickNextIdle > curTick || + (retryList.size() && (!inRetry || pktPort != retryList.front()))) + { addToRetryList(pktPort); DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n"); return false; @@ -195,31 +197,18 @@ Bus::recvTiming(PacketPtr pkt) // access has been handled twice. if (dest == Packet::Broadcast) { port = findPort(pkt->getAddr(), pkt->getSrc()); - pkt->flags &= ~SNOOP_COMMIT; - if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) { - bool success; - - pkt->flags |= SNOOP_COMMIT; - success = timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); - assert(success); - - if (pkt->flags & SATISFIED) { - //Cache-Cache transfer occuring - if (inRetry) { - retryList.front()->onRetryList(false); - retryList.pop_front(); - inRetry = false; - } - occupyBus(pkt); - DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n"); - return true; + timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); + + if (pkt->flags & SATISFIED) { + //Cache-Cache transfer occuring + if (inRetry) { + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; } - } else { - //Snoop didn't succeed - DPRINTF(Bus, "Adding1 a retry to RETRY list %d\n", - pktPort->getId()); - addToRetryList(pktPort); - return false; + occupyBus(pkt); + DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n"); + return true; } } else { assert(dest >= 0 && dest < maxId); @@ -426,7 +415,6 @@ Bus::recvAtomic(PacketPtr pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - pkt->flags |= SNOOP_COMMIT; // Assume one bus cycle in order to get through. This may have // some clock skew issues yet again... @@ -451,7 +439,6 @@ Bus::recvFunctional(PacketPtr pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - pkt->flags |= SNOOP_COMMIT; Port* port = findPort(pkt->getAddr(), pkt->getSrc()); functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 3ed4b84d1..b699271f7 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -40,29 +40,38 @@ using namespace std; -BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, - bool _isCpuSide) - : Port(_name, _cache), cache(_cache), isCpuSide(_isCpuSide) +BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) + : Port(_name, _cache), cache(_cache), otherPort(NULL) { blocked = false; waitingOnRetry = false; - //Start ports at null if more than one is created we should panic - //cpuSidePort = NULL; - //memSidePort = NULL; } +BaseCache::BaseCache(const std::string &name, Params ¶ms) + : MemObject(name), + blocked(0), blockedSnoop(0), + blkSize(params.blkSize), + missCount(params.maxMisses), drainEvent(NULL) +{ +} + + + void BaseCache::CachePort::recvStatusChange(Port::Status status) { - cache->recvStatusChange(status, isCpuSide); + if (status == Port::RangeChange) { + otherPort->sendStatusChange(Port::RangeChange); + } } void BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { - cache->getAddressRanges(resp, snoop, isCpuSide); + AddrRangeList dummy; + otherPort->getPeerAddressRanges(resp, dummy); } int @@ -115,92 +124,99 @@ BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt) sendFunctional(pkt); } + void -BaseCache::CachePort::recvRetry() +BaseCache::CachePort::respond(PacketPtr pkt, Tick time) { - PacketPtr pkt; - assert(waitingOnRetry); - if (!drainList.empty()) { - DPRINTF(CachePort, "%s attempting to send a retry for response (%i waiting)\n" - , name(), drainList.size()); - //We have some responses to drain first - pkt = drainList.front(); - drainList.pop_front(); - if (sendTiming(pkt)) { - DPRINTF(CachePort, "%s sucessful in sending a retry for" - "response (%i still waiting)\n", name(), drainList.size()); - if (!drainList.empty() || - !isCpuSide && cache->doMasterRequest() || - isCpuSide && cache->doSlaveRequest()) { - - DPRINTF(CachePort, "%s has more responses/requests\n", name()); - new BaseCache::RequestEvent(this, curTick + 1); - } - waitingOnRetry = false; - } - else { - drainList.push_front(pkt); + assert(time >= curTick); + if (pkt->needsResponse()) { + if (transmitList.empty()) { + assert(!responseEvent->scheduled()); + responseEvent->schedule(time); + transmitList.push_back(std::pair(time,pkt)); + return; } - // Check if we're done draining once this list is empty - if (drainList.empty()) - cache->checkDrain(); - } - else if (!isCpuSide) - { - DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name()); - if (!cache->doMasterRequest()) { - //This can happen if I am the owner of a block and see an upgrade - //while the block was in my WB Buffers. I just remove the - //wb and de-assert the masterRequest - waitingOnRetry = false; + + // something is on the list and this belongs at the end + if (time >= transmitList.back().first) { + transmitList.push_back(std::pair(time,pkt)); return; } - pkt = cache->getPacket(); - MSHR* mshr = (MSHR*) pkt->senderState; - //Copy the packet, it may be modified/destroyed elsewhere - PacketPtr copyPkt = new Packet(*pkt); - copyPkt->dataStatic(pkt->getPtr()); - mshr->pkt = copyPkt; - - bool success = sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "succesful" : "unsuccesful"); - - waitingOnRetry = !success; - if (waitingOnRetry) { - DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + // Something is on the list and this belongs somewhere else + std::list >::iterator i = + transmitList.begin(); + std::list >::iterator end = + transmitList.end(); + bool done = false; + + while (i != end && !done) { + if (time < i->first) { + if (i == transmitList.begin()) { + //Inserting at begining, reschedule + responseEvent->reschedule(time); + } + transmitList.insert(i,std::pair(time,pkt)); + done = true; + } + i++; + } + } + else { + assert(0); + // this code was on the cpuSidePort only... do we still need it? + if (pkt->cmd != MemCmd::UpgradeReq) + { + delete pkt->req; + delete pkt; } + } +} - cache->sendResult(pkt, mshr, success); +bool +BaseCache::CachePort::drainResponse() +{ + DPRINTF(CachePort, + "%s attempting to send a retry for response (%i waiting)\n", + name(), drainList.size()); + //We have some responses to drain first + PacketPtr pkt = drainList.front(); + if (sendTiming(pkt)) { + drainList.pop_front(); + DPRINTF(CachePort, "%s sucessful in sending a retry for" + "response (%i still waiting)\n", name(), drainList.size()); + if (!drainList.empty() || isBusRequested()) { - if (success && cache->doMasterRequest()) - { - DPRINTF(CachePort, "%s has more requests\n", name()); - //Still more to issue, rerequest in 1 cycle - new BaseCache::RequestEvent(this, curTick + 1); + DPRINTF(CachePort, "%s has more responses/requests\n", name()); + return false; } + } else { + waitingOnRetry = true; + DPRINTF(CachePort, "%s now waiting on a retry\n", name()); } - else - { - assert(cache->doSlaveRequest()); - //pkt = cache->getCoherencePacket(); - //We save the packet, no reordering on CSHRS - pkt = cache->getCoherencePacket(); - MSHR* cshr = (MSHR*)pkt->senderState; - bool success = sendTiming(pkt); - cache->sendCoherenceResult(pkt, cshr, success); - waitingOnRetry = !success; - if (success && cache->doSlaveRequest()) - { - DPRINTF(CachePort, "%s has more requests\n", name()); - //Still more to issue, rerequest in 1 cycle - new BaseCache::RequestEvent(this, curTick + 1); + return true; +} + + +bool +BaseCache::CachePort::recvRetryCommon() +{ + assert(waitingOnRetry); + waitingOnRetry = false; + if (!drainList.empty()) { + if (!drainResponse()) { + // more responses to drain... re-request bus + scheduleRequestEvent(curTick + 1); } + // Check if we're done draining once this list is empty + if (drainList.empty()) { + cache->checkDrain(); + } + return true; } - if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name()); - else DPRINTF(CachePort, "%s no longer waiting on retry\n", name()); - return; + return false; } + + void BaseCache::CachePort::setBlocked() { @@ -225,143 +241,6 @@ BaseCache::CachePort::clearBlocked() } } -BaseCache::RequestEvent::RequestEvent(CachePort *_cachePort, Tick when) - : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort) -{ - this->setFlags(AutoDelete); - schedule(when); -} - -void -BaseCache::RequestEvent::process() -{ - if (cachePort->waitingOnRetry) return; - //We have some responses to drain first - if (!cachePort->drainList.empty()) { - DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name()); - if (cachePort->sendTiming(cachePort->drainList.front())) { - DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name()); - cachePort->drainList.pop_front(); - if (!cachePort->drainList.empty() || - !cachePort->isCpuSide && cachePort->cache->doMasterRequest() || - cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) { - - DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name()); - this->schedule(curTick + 1); - } - } - else { - cachePort->waitingOnRetry = true; - DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); - } - } - else if (!cachePort->isCpuSide) - { //MSHR - DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name()); - if (!cachePort->cache->doMasterRequest()) { - //This can happen if I am the owner of a block and see an upgrade - //while the block was in my WB Buffers. I just remove the - //wb and de-assert the masterRequest - return; - } - - PacketPtr pkt = cachePort->cache->getPacket(); - MSHR* mshr = (MSHR*) pkt->senderState; - //Copy the packet, it may be modified/destroyed elsewhere - PacketPtr copyPkt = new Packet(*pkt); - copyPkt->dataStatic(pkt->getPtr()); - mshr->pkt = copyPkt; - - bool success = cachePort->sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "succesful" : "unsuccesful"); - - cachePort->waitingOnRetry = !success; - if (cachePort->waitingOnRetry) { - DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); - } - - cachePort->cache->sendResult(pkt, mshr, success); - if (success && cachePort->cache->doMasterRequest()) - { - DPRINTF(CachePort, "%s still more MSHR requests to send\n", - cachePort->name()); - //Still more to issue, rerequest in 1 cycle - this->schedule(curTick+1); - } - } - else - { - //CSHR - assert(cachePort->cache->doSlaveRequest()); - PacketPtr pkt = cachePort->cache->getCoherencePacket(); - MSHR* cshr = (MSHR*) pkt->senderState; - bool success = cachePort->sendTiming(pkt); - cachePort->cache->sendCoherenceResult(pkt, cshr, success); - cachePort->waitingOnRetry = !success; - if (cachePort->waitingOnRetry) - DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name()); - if (success && cachePort->cache->doSlaveRequest()) - { - DPRINTF(CachePort, "%s still more CSHR requests to send\n", - cachePort->name()); - //Still more to issue, rerequest in 1 cycle - this->schedule(curTick+1); - } - } -} - -const char * -BaseCache::RequestEvent::description() -{ - return "Cache request event"; -} - -BaseCache::ResponseEvent::ResponseEvent(CachePort *_cachePort) - : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort) -{ -} - -void -BaseCache::ResponseEvent::process() -{ - assert(cachePort->transmitList.size()); - assert(cachePort->transmitList.front().first <= curTick); - PacketPtr pkt = cachePort->transmitList.front().second; - cachePort->transmitList.pop_front(); - if (!cachePort->transmitList.empty()) { - Tick time = cachePort->transmitList.front().first; - schedule(time <= curTick ? curTick+1 : time); - } - - if (pkt->flags & NACKED_LINE) - pkt->result = Packet::Nacked; - else - pkt->result = Packet::Success; - pkt->makeTimingResponse(); - DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name()); - if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) { - //Already have a list, just append - cachePort->drainList.push_back(pkt); - DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name()); - } - else if (!cachePort->sendTiming(pkt)) { - //It failed, save it to list of drain events - DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name()); - cachePort->drainList.push_back(pkt); - cachePort->waitingOnRetry = true; - } - - // Check if we're done draining once this list is empty - if (cachePort->drainList.empty() && cachePort->transmitList.empty()) - cachePort->cache->checkDrain(); -} - -const char * -BaseCache::ResponseEvent::description() -{ - return "Cache response event"; -} void BaseCache::init() diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index e45e36fa0..2d63945d9 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -26,6 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Steve Reinhardt + * Ron Dreslinski */ /** @@ -83,7 +85,10 @@ class BaseCache : public MemObject BaseCache *cache; protected: - CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); + Event *responseEvent; + + CachePort(const std::string &_name, BaseCache *_cache); + virtual void recvStatusChange(Status status); virtual void getDeviceAddressRanges(AddrRangeList &resp, @@ -91,9 +96,11 @@ class BaseCache : public MemObject virtual int deviceBlockSize(); - virtual void recvRetry(); + bool recvRetryCommon(); public: + void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; } + void setBlocked(); void clearBlocked(); @@ -104,65 +111,52 @@ class BaseCache : public MemObject bool canDrain() { return drainList.empty() && transmitList.empty(); } + bool drainResponse(); + + CachePort *otherPort; + bool blocked; bool mustSendRetry; - bool isCpuSide; - bool waitingOnRetry; + /** + * Bit vector for the outstanding requests for the master interface. + */ + uint8_t requestCauses; + std::list drainList; std::list > transmitList; - }; - struct RequestEvent : public Event - { - CachePort *cachePort; + bool isBusRequested() { return requestCauses != 0; } - RequestEvent(CachePort *_cachePort, Tick when); - void process(); - const char *description(); - }; + // These need to be virtual since the Event objects depend on + // cache template parameters. + virtual void scheduleRequestEvent(Tick t) = 0; - struct ResponseEvent : public Event - { - CachePort *cachePort; + void requestBus(RequestCause cause, Tick time) + { + if (!isBusRequested() && !waitingOnRetry) { + scheduleRequestEvent(time); + } + requestCauses |= (1 << cause); + } + + void deassertBusRequest(RequestCause cause) + { + requestCauses &= ~(1 << cause); + } - ResponseEvent(CachePort *_cachePort); - void process(); - const char *description(); + void respond(PacketPtr pkt, Tick time); }; public: //Made public so coherence can get at it. CachePort *cpuSidePort; CachePort *memSidePort; - ResponseEvent *sendEvent; - ResponseEvent *memSendEvent; - private: - void recvStatusChange(Port::Status status, bool isCpuSide) - { - if (status == Port::RangeChange){ - if (!isCpuSide) { - cpuSidePort->sendStatusChange(Port::RangeChange); - } - else { - memSidePort->sendStatusChange(Port::RangeChange); - } - } - } - - virtual PacketPtr getPacket() = 0; - - virtual PacketPtr getCoherencePacket() = 0; - - virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0; - - virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0; - /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause @@ -175,16 +169,6 @@ class BaseCache : public MemObject */ uint8_t blockedSnoop; - /** - * Bit vector for the outstanding requests for the master interface. - */ - uint8_t masterRequests; - - /** - * Bit vector for the outstanding requests for the slave interface. - */ - uint8_t slaveRequests; - protected: /** Stores time the cache blocked for statistics. */ @@ -309,20 +293,10 @@ class BaseCache : public MemObject * of this cache. * @param params The parameter object for this BaseCache. */ - BaseCache(const std::string &name, Params ¶ms) - : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0), - slaveRequests(0), blkSize(params.blkSize), - missCount(params.maxMisses), drainEvent(NULL) - { - //Start ports at null if more than one is created we should panic - cpuSidePort = NULL; - memSidePort = NULL; - } + BaseCache(const std::string &name, Params ¶ms); ~BaseCache() { - delete sendEvent; - delete memSendEvent; } virtual void init(); @@ -422,12 +396,12 @@ class BaseCache : public MemObject } /** - * True if the master bus should be requested. + * True if the memory-side bus should be requested. * @return True if there are outstanding requests for the master bus. */ - bool doMasterRequest() + bool isMemSideBusRequested() { - return masterRequests != 0; + return memSidePort->isBusRequested(); } /** @@ -435,59 +409,18 @@ class BaseCache : public MemObject * @param cause The reason for the request. * @param time The time to make the request. */ - void setMasterRequest(RequestCause cause, Tick time) + void requestMemSideBus(RequestCause cause, Tick time) { - if (!doMasterRequest() && !memSidePort->waitingOnRetry) - { - new RequestEvent(memSidePort, time); - } - uint8_t flag = 1<requestBus(cause, time); } /** * Clear the master bus request for the given cause. * @param cause The request reason to clear. */ - void clearMasterRequest(RequestCause cause) + void deassertMemSideBusRequest(RequestCause cause) { - uint8_t flag = 1<waitingOnRetry) - { - new RequestEvent(cpuSidePort, time); - } - uint8_t flag = 1<deassertBusRequest(cause); checkDrain(); } @@ -498,111 +431,7 @@ class BaseCache : public MemObject */ void respond(PacketPtr pkt, Tick time) { - assert(time >= curTick); - if (pkt->needsResponse()) { -/* CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); -*/ - if (cpuSidePort->transmitList.empty()) { - assert(!sendEvent->scheduled()); - sendEvent->schedule(time); - cpuSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - - // something is on the list and this belongs at the end - if (time >= cpuSidePort->transmitList.back().first) { - cpuSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - // Something is on the list and this belongs somewhere else - std::list >::iterator i = - cpuSidePort->transmitList.begin(); - std::list >::iterator end = - cpuSidePort->transmitList.end(); - bool done = false; - - while (i != end && !done) { - if (time < i->first) { - if (i == cpuSidePort->transmitList.begin()) { - //Inserting at begining, reschedule - sendEvent->reschedule(time); - } - cpuSidePort->transmitList.insert(i,std::pair - (time,pkt)); - done = true; - } - i++; - } - } - else { - if (pkt->cmd != MemCmd::UpgradeReq) - { - delete pkt->req; - delete pkt; - } - } - } - - /** - * Send a reponse to the slave interface and calculate miss latency. - * @param pkt The request to respond to. - * @param time The time the response is ready. - */ - void respondToMiss(PacketPtr pkt, Tick time) - { - assert(time >= curTick); - if (!pkt->req->isUncacheable()) { - missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - time - pkt->time; - } - if (pkt->needsResponse()) { -/* CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); -*/ - if (cpuSidePort->transmitList.empty()) { - assert(!sendEvent->scheduled()); - sendEvent->schedule(time); - cpuSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - - // something is on the list and this belongs at the end - if (time >= cpuSidePort->transmitList.back().first) { - cpuSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - // Something is on the list and this belongs somewhere else - std::list >::iterator i = - cpuSidePort->transmitList.begin(); - std::list >::iterator end = - cpuSidePort->transmitList.end(); - bool done = false; - - while (i != end && !done) { - if (time < i->first) { - if (i == cpuSidePort->transmitList.begin()) { - //Inserting at begining, reschedule - sendEvent->reschedule(time); - } - cpuSidePort->transmitList.insert(i,std::pair - (time,pkt)); - done = true; - } - i++; - } - } - else { - if (pkt->cmd != MemCmd::UpgradeReq) - { - delete pkt->req; - delete pkt; - } - } + cpuSidePort->respond(pkt, time); } /** @@ -611,65 +440,7 @@ class BaseCache : public MemObject */ void respondToSnoop(PacketPtr pkt, Tick time) { - assert(time >= curTick); - assert (pkt->needsResponse()); -/* CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); - reqMem->schedule(time); -*/ - if (memSidePort->transmitList.empty()) { - assert(!memSendEvent->scheduled()); - memSendEvent->schedule(time); - memSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - - // something is on the list and this belongs at the end - if (time >= memSidePort->transmitList.back().first) { - memSidePort->transmitList.push_back(std::pair - (time,pkt)); - return; - } - // Something is on the list and this belongs somewhere else - std::list >::iterator i = - memSidePort->transmitList.begin(); - std::list >::iterator end = - memSidePort->transmitList.end(); - bool done = false; - - while (i != end && !done) { - if (time < i->first) { - if (i == memSidePort->transmitList.begin()) { - //Inserting at begining, reschedule - memSendEvent->reschedule(time); - } - memSidePort->transmitList.insert(i,std::pair(time,pkt)); - done = true; - } - i++; - } - } - - /** - * Notification from master interface that a address range changed. Nothing - * to do for a cache. - */ - void rangeChange() {} - - void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide) - { - if (isCpuSide) - { - AddrRangeList dummy; - memSidePort->getPeerAddressRanges(resp, dummy); - } - else - { - //This is where snoops get updated - AddrRangeList dummy; - cpuSidePort->getPeerAddressRanges(dummy, snoop); - return; - } + memSidePort->respond(pkt, time); } virtual unsigned int drain(Event *de); @@ -686,7 +457,7 @@ class BaseCache : public MemObject bool canDrain() { - if (doMasterRequest() || doSlaveRequest()) { + if (isMemSideBusRequested()) { return false; } else if (memSidePort && !memSidePort->canDrain()) { return false; diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index cb4e7f62e..2b4e7b9c8 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -61,7 +61,6 @@ #include "mem/cache/miss/miss_queue.hh" #include "mem/cache/miss/blocking_buffer.hh" -#include "mem/cache/coherence/uni_coherence.hh" #include "mem/cache/coherence/simple_coherence.hh" #include "mem/cache/cache_impl.hh" @@ -72,27 +71,22 @@ #if defined(USE_CACHE_FALRU) template class Cache; -template class Cache; #endif #if defined(USE_CACHE_IIC) template class Cache; -template class Cache; #endif #if defined(USE_CACHE_LRU) template class Cache; -template class Cache; #endif #if defined(USE_CACHE_SPLIT) template class Cache; -template class Cache; #endif #if defined(USE_CACHE_SPLIT_LIFO) template class Cache; -template class Cache; #endif #endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 3e45c85d2..75fb50f4e 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -28,6 +28,7 @@ * Authors: Erik Hallnor * Dave Greene * Steve Reinhardt + * Ron Dreslinski */ /** @@ -46,6 +47,8 @@ #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/miss_buffer.hh" +#include "sim/eventq.hh" + //Forward decleration class MSHR; class BasePrefetcher; @@ -83,11 +86,26 @@ class Cache : public BaseCache return static_cast *>(cache); } + void processRequestEvent(); + void processResponseEvent(); + virtual bool recvTiming(PacketPtr pkt); + virtual void recvRetry(); + virtual Tick recvAtomic(PacketPtr pkt); virtual void recvFunctional(PacketPtr pkt); + + typedef EventWrapper + ResponseEvent; + + typedef EventWrapper + RequestEvent; + + virtual void scheduleRequestEvent(Tick t) { + new RequestEvent(this, t); + } }; class MemSidePort : public CachePort @@ -103,11 +121,26 @@ class Cache : public BaseCache return static_cast *>(cache); } + void processRequestEvent(); + void processResponseEvent(); + virtual bool recvTiming(PacketPtr pkt); + virtual void recvRetry(); + virtual Tick recvAtomic(PacketPtr pkt); virtual void recvFunctional(PacketPtr pkt); + + typedef EventWrapper + ResponseEvent; + + typedef EventWrapper + RequestEvent; + + virtual void scheduleRequestEvent(Tick t) { + new RequestEvent(this, t); + } }; /** Tag and data Storage */ @@ -339,8 +372,6 @@ class Cache : public BaseCache virtual Port *getPort(const std::string &if_name, int idx = -1); virtual void deletePortRefs(Port *p); - virtual void recvStatusChange(Port::Status status, bool isCpuSide); - void regStats(); /** @@ -354,21 +385,14 @@ class Cache : public BaseCache * Selects a request to send on the bus. * @return The memory request to service. */ - virtual PacketPtr getPacket(); + PacketPtr getPacket(); /** * Was the request was sent successfully? * @param pkt The request. * @param success True if the request was sent successfully. */ - virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success); - - /** - * Was the CSHR request was sent successfully? - * @param pkt The request. - * @param success True if the request was sent successfully. - */ - virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* cshr, bool success); + void sendResult(PacketPtr &pkt, MSHR* mshr, bool success); /** * Handles a response (cache line fill/write ack) from the bus. @@ -376,12 +400,6 @@ class Cache : public BaseCache */ void handleResponse(PacketPtr &pkt); - /** - * Selects a coherence message to forward to lower levels of the hierarchy. - * @return The coherence message to forward. - */ - virtual PacketPtr getCoherencePacket(); - /** * Snoops bus transactions to maintain coherence. * @param pkt The current bus transaction. diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index e887f711e..bc1a8a775 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -75,7 +75,6 @@ #include "mem/cache/miss/blocking_buffer.hh" // Coherence Templates -#include "mem/cache/coherence/uni_coherence.hh" #include "mem/cache/coherence/simple_coherence.hh" //Prefetcher Headers @@ -302,13 +301,8 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) } while (0) #define BUILD_COHERENCE(b) do { \ - if (protocol == NULL) { \ - UniCoherence *coh = new UniCoherence(); \ - BUILD_CACHES(UniCoherence); \ - } else { \ - SimpleCoherence *coh = new SimpleCoherence(protocol); \ - BUILD_CACHES(SimpleCoherence); \ - } \ + SimpleCoherence *coh = new SimpleCoherence(protocol); \ + BUILD_CACHES(SimpleCoherence); \ } while (0) #if defined(USE_TAGGED) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 9b094c1e3..6b9eac865 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -28,6 +28,8 @@ * Authors: Erik Hallnor * Dave Greene * Nathan Binkert + * Steve Reinhardt + * Ron Dreslinski */ /** @@ -57,18 +59,8 @@ bool SIGNAL_NACK_HACK; template -void -Cache:: -recvStatusChange(Port::Status status, bool isCpuSide) -{ - -} - - -template -Cache:: -Cache(const std::string &_name, - Cache::Params ¶ms) +Cache::Cache(const std::string &_name, + Cache::Params ¶ms) : BaseCache(_name, params.baseParams), prefetchAccess(params.prefetchAccess), tags(params.tags), missQueue(params.missQueue), @@ -84,6 +76,11 @@ Cache(const std::string &_name, adaptiveCompression(params.adaptiveCompression), writebackCompressed(params.writebackCompressed) { + cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this); + memSidePort = new MemSidePort(_name + "-mem_side_port", this); + cpuSidePort->setOtherPort(memSidePort); + memSidePort->setOtherPort(cpuSidePort); + tags->setCache(this); missQueue->setCache(this); missQueue->setPrefetcher(prefetcher); @@ -406,7 +403,11 @@ Cache::handleFill(BlkType *blk, MSHR * mshr, // mshr->pkt = pkt; break; } - respondToMiss(target, completion_time); + if (!target->req->isUncacheable()) { + missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->time; + } + respond(target, completion_time); mshr->popTarget(); } @@ -688,7 +689,7 @@ Cache::getPacket() } } - assert(!doMasterRequest() || missQueue->havePending()); + assert(!isMemSideBusRequested() || missQueue->havePending()); assert(!pkt || pkt->time <= curTick); SIGNAL_NACK_HACK = false; return pkt; @@ -727,7 +728,6 @@ Cache::sendResult(PacketPtr &pkt, MSHR* mshr, pkt->flags &= ~NACKED_LINE; SIGNAL_NACK_HACK = false; pkt->flags &= ~SATISFIED; - pkt->flags &= ~SNOOP_COMMIT; //Rmove copy from mshr delete mshr->pkt; @@ -783,22 +783,6 @@ Cache::handleResponse(PacketPtr &pkt) } } -template -PacketPtr -Cache::getCoherencePacket() -{ - return coherence->getPacket(); -} - -template -void -Cache::sendCoherenceResult(PacketPtr &pkt, - MSHR *cshr, - bool success) -{ - coherence->sendResult(pkt, cshr, success); -} - template void @@ -1146,27 +1130,15 @@ template Port * Cache::getPort(const std::string &if_name, int idx) { - if (if_name == "" || if_name == "cpu_side") - { - if (cpuSidePort == NULL) { - cpuSidePort = new CpuSidePort(name() + "-cpu_side_port", this); - sendEvent = new ResponseEvent(cpuSidePort); - } + if (if_name == "" || if_name == "cpu_side") { return cpuSidePort; - } - else if (if_name == "functional") - { - return new CpuSidePort(name() + "-cpu_side_funcport", this); - } - else if (if_name == "mem_side") - { - if (memSidePort != NULL) - panic("Already have a mem side for this cache\n"); - memSidePort = new MemSidePort(name() + "-mem_side_port", this); - memSendEvent = new ResponseEvent(memSidePort); + } else if (if_name == "mem_side") { return memSidePort; + } else if (if_name == "functional") { + return new CpuSidePort(name() + "-cpu_side_funcport", this); + } else { + panic("Port name %s unrecognized\n", if_name); } - else panic("Port name %s unrecognized\n", if_name); } template @@ -1213,6 +1185,68 @@ Cache::CpuSidePort::recvTiming(PacketPtr pkt) return true; } + +template +void +Cache::CpuSidePort::recvRetry() +{ + recvRetryCommon(); +} + + +template +void +Cache::CpuSidePort::processRequestEvent() +{ + if (waitingOnRetry) + return; + //We have some responses to drain first + if (!drainList.empty()) { + if (!drainResponse()) { + // more responses to drain... re-request bus + scheduleRequestEvent(curTick + 1); + } + } +} + + +template +void +Cache::CpuSidePort::processResponseEvent() +{ + assert(transmitList.size()); + assert(transmitList.front().first <= curTick); + PacketPtr pkt = transmitList.front().second; + transmitList.pop_front(); + if (!transmitList.empty()) { + Tick time = transmitList.front().first; + responseEvent->schedule(time <= curTick ? curTick+1 : time); + } + + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; + pkt->makeTimingResponse(); + DPRINTF(CachePort, "%s attempting to send a response\n", name()); + if (!drainList.empty() || waitingOnRetry) { + //Already have a list, just append + drainList.push_back(pkt); + DPRINTF(CachePort, "%s appending response onto drain list\n", name()); + } + else if (!sendTiming(pkt)) { + //It failed, save it to list of drain events + DPRINTF(CachePort, "%s now waiting for a retry\n", name()); + drainList.push_back(pkt); + waitingOnRetry = true; + } + + // Check if we're done draining once this list is empty + if (drainList.empty() && transmitList.empty()) + myCache()->checkDrain(); +} + + template Tick Cache::CpuSidePort::recvAtomic(PacketPtr pkt) @@ -1249,23 +1283,149 @@ Cache::MemSidePort::recvTiming(PacketPtr pkt) if (pkt->result == Packet::Nacked) panic("Need to implement cache resending nacked packets!\n"); - if (pkt->isRequest() && blocked) - { + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; return false; } - if (pkt->isResponse()) + if (pkt->isResponse()) { myCache()->handleResponse(pkt); - else { - //Check if we should do the snoop - if (pkt->flags & SNOOP_COMMIT) - myCache()->snoop(pkt); + } else { + myCache()->snoop(pkt); } return true; } +template +void +Cache::MemSidePort::recvRetry() +{ + if (recvRetryCommon()) { + return; + } + + DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name()); + if (!cache->isMemSideBusRequested()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + waitingOnRetry = false; + return; + } + PacketPtr pkt = myCache()->getPacket(); + MSHR* mshr = (MSHR*) pkt->senderState; + //Copy the packet, it may be modified/destroyed elsewhere + PacketPtr copyPkt = new Packet(*pkt); + copyPkt->dataStatic(pkt->getPtr()); + mshr->pkt = copyPkt; + + bool success = sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "succesful" : "unsuccesful"); + + waitingOnRetry = !success; + if (waitingOnRetry) { + DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + } + + myCache()->sendResult(pkt, mshr, success); + + if (success && cache->isMemSideBusRequested()) + { + DPRINTF(CachePort, "%s has more requests\n", name()); + //Still more to issue, rerequest in 1 cycle + new RequestEvent(this, curTick + 1); + } +} + + +template +void +Cache::MemSidePort::processRequestEvent() +{ + if (waitingOnRetry) + return; + //We have some responses to drain first + if (!drainList.empty()) { + if (!drainResponse()) { + // more responses to drain... re-request bus + scheduleRequestEvent(curTick + 1); + } + return; + } + + DPRINTF(CachePort, "%s trying to send a MSHR request\n", name()); + if (!isBusRequested()) { + //This can happen if I am the owner of a block and see an upgrade + //while the block was in my WB Buffers. I just remove the + //wb and de-assert the masterRequest + return; + } + + PacketPtr pkt = myCache()->getPacket(); + MSHR* mshr = (MSHR*) pkt->senderState; + //Copy the packet, it may be modified/destroyed elsewhere + PacketPtr copyPkt = new Packet(*pkt); + copyPkt->dataStatic(pkt->getPtr()); + mshr->pkt = copyPkt; + + bool success = sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "succesful" : "unsuccesful"); + + waitingOnRetry = !success; + if (waitingOnRetry) { + DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + } + + myCache()->sendResult(pkt, mshr, success); + if (success && isBusRequested()) + { + DPRINTF(CachePort, "%s still more MSHR requests to send\n", name()); + //Still more to issue, rerequest in 1 cycle + scheduleRequestEvent(curTick+1); + } +} + + +template +void +Cache::MemSidePort::processResponseEvent() +{ + assert(transmitList.size()); + assert(transmitList.front().first <= curTick); + PacketPtr pkt = transmitList.front().second; + transmitList.pop_front(); + if (!transmitList.empty()) { + Tick time = transmitList.front().first; + responseEvent->schedule(time <= curTick ? curTick+1 : time); + } + + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; + pkt->makeTimingResponse(); + DPRINTF(CachePort, "%s attempting to send a response\n", name()); + if (!drainList.empty() || waitingOnRetry) { + //Already have a list, just append + drainList.push_back(pkt); + DPRINTF(CachePort, "%s appending response onto drain list\n", name()); + } + else if (!sendTiming(pkt)) { + //It failed, save it to list of drain events + DPRINTF(CachePort, "%s now waiting for a retry\n", name()); + drainList.push_back(pkt); + waitingOnRetry = true; + } + + // Check if we're done draining once this list is empty + if (drainList.empty() && transmitList.empty()) + myCache()->checkDrain(); +} + + template Tick Cache::MemSidePort::recvAtomic(PacketPtr pkt) @@ -1292,15 +1452,17 @@ template Cache:: CpuSidePort::CpuSidePort(const std::string &_name, Cache *_cache) - : BaseCache::CachePort(_name, _cache, true) + : BaseCache::CachePort(_name, _cache) { + responseEvent = new ResponseEvent(this); } template Cache:: MemSidePort::MemSidePort(const std::string &_name, Cache *_cache) - : BaseCache::CachePort(_name, _cache, false) + : BaseCache::CachePort(_name, _cache) { + responseEvent = new ResponseEvent(this); } diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript index 03a2d85d7..7b94f73e1 100644 --- a/src/mem/cache/coherence/SConscript +++ b/src/mem/cache/coherence/SConscript @@ -31,5 +31,4 @@ Import('*') Source('coherence_protocol.cc') -Source('uni_coherence.cc') diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc deleted file mode 100644 index 6061c89c3..000000000 --- a/src/mem/cache/coherence/uni_coherence.cc +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -#include "mem/cache/coherence/uni_coherence.hh" -#include "mem/cache/base_cache.hh" - -#include "base/trace.hh" - -using namespace std; - -UniCoherence::UniCoherence() - : cshrs(50) -{ -} - -PacketPtr -UniCoherence::getPacket() -{ - PacketPtr pkt = cshrs.getReq(); - return pkt; -} - -void -UniCoherence::sendResult(PacketPtr &pkt, MSHR* cshr, bool success) -{ - if (success) - { - bool unblock = cshrs.isFull(); -// cshrs.markInService(cshr); - delete pkt->req; - cshrs.deallocate(cshr); - if (!cshrs.havePending()) { - cache->clearSlaveRequest(Request_Coherence); - } - if (unblock) { - //since CSHRs are always used as buffers, should always get rid of one - assert(!cshrs.isFull()); - cache->clearBlocked(Blocked_Coherence); - } - } -} - - -/** - * @todo add support for returning slave requests, not doing them here. - */ -bool -UniCoherence::handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr, - CacheBlk::State &new_state) -{ - new_state = 0; - if (pkt->isInvalidate()) { - DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", - pkt->getAddr(), blk); - } - else if (blk) { - new_state = blk->status; - if (pkt->isRead()) { - DPRINTF(Cache, "Uni-coherence snoops a read that hit in itself" - ". Should satisfy the packet\n"); - return true; //Satisfy Reads if we can - } - } - return false; -} - -bool -UniCoherence::propogateInvalidate(PacketPtr pkt, bool isTiming) -{ - if (pkt->isInvalidate()) { -/* Temp Fix for now, forward all invalidates up as functional accesses */ - if (isTiming) { - // Forward to other caches - Request* req = new Request(pkt->req->getPaddr(), pkt->getSize(), 0); - PacketPtr tmp = new Packet(req, MemCmd::InvalidateReq, -1); - cshrs.allocate(tmp); - cache->setSlaveRequest(Request_Coherence, curTick); - if (cshrs.isFull()) - cache->setBlockedForSnoop(Blocked_Coherence); - } - else { - PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1); - cache->cpuSidePort->sendAtomic(tmp); - delete tmp; - } -/**/ -/* PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1); - cache->cpuSidePort->sendFunctional(tmp); - delete tmp; -*/ - } - if (pkt->isRead()) { - /*For now we will see if someone above us has the data by - doing a functional access on reads. Fix this later */ - PacketPtr tmp = new Packet(pkt->req, MemCmd::ReadReq, -1); - tmp->allocate(); - cache->cpuSidePort->sendFunctional(tmp); - bool hit = (tmp->result == Packet::Success); - if (hit) { - memcpy(pkt->getPtr(), tmp->getPtr(), - pkt->getSize()); - DPRINTF(Cache, "Uni-coherence snoops a read that hit in L1\n"); - } - delete tmp; - return hit; - } - return false; -} diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh deleted file mode 100644 index 9efb4e192..000000000 --- a/src/mem/cache/coherence/uni_coherence.hh +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -#ifndef __UNI_COHERENCE_HH__ -#define __UNI_COHERENCE_HH__ - -#include "base/trace.hh" -#include "base/misc.hh" -#include "mem/cache/cache_blk.hh" -#include "mem/cache/miss/mshr_queue.hh" -#include "mem/packet.hh" - -class BaseCache; - -class UniCoherence -{ - protected: - /** Buffers to hold forwarded invalidates. */ - MSHRQueue cshrs; - /** Pointer to the parent cache. */ - BaseCache *cache; - - public: - /** - * Construct and initialize this coherence policy. - */ - UniCoherence(); - - /** - * Set the pointer to the parent cache. - * @param _cache The parent cache. - */ - void setCache(BaseCache *_cache) - { - cache = _cache; - } - - /** - * Register statistics. - * @param name The name to prepend to stat descriptions. - */ - void regStats(const std::string &name) - { - } - - /** - * Return Read. - * @param cmd The request's command. - * @param state The current state of the cache block. - * @return The proper bus command, as determined by the protocol. - * @todo Make changes so writebacks don't get here. - */ - MemCmd getBusCmd(MemCmd cmd, CacheBlk::State state) - { - if (cmd == MemCmd::HardPFReq && state) - warn("Trying to issue a prefetch to a block we already have\n"); - if (cmd == MemCmd::Writeback) - return MemCmd::Writeback; - return MemCmd::ReadReq; - } - - /** - * Just return readable and writeable. - * @param pkt The bus response. - * @param current The current block state. - * @return The new state. - */ - CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current) - { - if (pkt->senderState) //Blocking Buffers don't get mshrs - { - if (((MSHR *)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) { - DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); - return BlkHWPrefetched | BlkValid | BlkWritable; - } - else { - return BlkValid | BlkWritable; - } - } - //@todo What about prefetching with blocking buffers - else - return BlkValid | BlkWritable; - } - - /** - * Return outstanding invalidate to forward. - * @return The next invalidate to forward to lower levels of cache. - */ - PacketPtr getPacket(); - - /** - * Was the CSHR request was sent successfully? - * @param pkt The request. - * @param success True if the request was sent successfully. - */ - void sendResult(PacketPtr &pkt, MSHR* cshr, bool success); - - /** - * Handle snooped bus requests. - * @param pkt The snooped bus request. - * @param blk The cache block corresponding to the request, if any. - * @param mshr The MSHR corresponding to the request, if any. - * @param new_state The new coherence state of the block. - * @return True if the request should be satisfied locally. - */ - bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr, - CacheBlk::State &new_state); - - /** - * Return true if this coherence policy can handle fast cache writes. - */ - bool allowFastWrites() { return true; } - - bool hasProtocol() { return false; } - - bool propogateInvalidate(PacketPtr pkt, bool isTiming); -}; - -#endif //__UNI_COHERENCE_HH__ diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index e8ff26880..281328c2e 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -64,7 +64,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time) std::memcpy(wb.pkt->getPtr(), pkt->getPtr(), blk_size); cache->setBlocked(Blocked_NoWBBuffers); - cache->setMasterRequest(Request_WB, time); + cache->requestMemSideBus(Request_WB, time); return; } @@ -77,7 +77,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time) miss.pkt->flags |= CACHE_LINE_FILL; } cache->setBlocked(Blocked_NoMSHRs); - cache->setMasterRequest(Request_MSHR, time); + cache->requestMemSideBus(Request_MSHR, time); } PacketPtr @@ -111,7 +111,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr) // Forwarding a write/ writeback, don't need to change // the command assert(mshr == &wb); - cache->clearMasterRequest(Request_WB); + cache->deassertMemSideBusRequest(Request_WB); if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); wb.deallocate(); @@ -121,7 +121,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr) } } else { assert(mshr == &miss); - cache->clearMasterRequest(Request_MSHR); + cache->deassertMemSideBusRequest(Request_MSHR); if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); miss.deallocate(); @@ -178,7 +178,7 @@ BlockingBuffer::squash(int threadNum) if (!miss.inService) { miss.deallocate(); cache->clearBlocked(Blocked_NoMSHRs); - cache->clearMasterRequest(Request_MSHR); + cache->deassertMemSideBusRequest(Request_MSHR); } } } @@ -203,7 +203,7 @@ BlockingBuffer::doWriteback(Addr addr, writebacks[0/*pkt->req->getThreadNum()*/]++; wb.allocateAsBuffer(pkt); - cache->setMasterRequest(Request_WB, curTick); + cache->requestMemSideBus(Request_WB, curTick); cache->setBlocked(Blocked_NoWBBuffers); } @@ -221,7 +221,7 @@ BlockingBuffer::doWriteback(PacketPtr &pkt) std::memcpy(wb.pkt->getPtr(), pkt->getPtr(), pkt->getSize()); cache->setBlocked(Blocked_NoWBBuffers); - cache->setMasterRequest(Request_WB, curTick); + cache->requestMemSideBus(Request_WB, curTick); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 24ca9cfa2..67036ed02 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -348,7 +348,7 @@ MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time) } if (pkt->cmd != MemCmd::HardPFReq) { //If we need to request the bus (not on HW prefetch), do so - cache->setMasterRequest(Request_MSHR, time); + cache->requestMemSideBus(Request_MSHR, time); } return mshr; } @@ -376,7 +376,7 @@ MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time) cache->setBlocked(Blocked_NoWBBuffers); } - cache->setMasterRequest(Request_WB, time); + cache->requestMemSideBus(Request_WB, time); return mshr; } @@ -450,7 +450,7 @@ MissQueue::fetchBlock(Addr addr, int blk_size, Tick time, if (mq.isFull()) { cache->setBlocked(Blocked_NoMSHRs); } - cache->setMasterRequest(Request_MSHR, time); + cache->requestMemSideBus(Request_MSHR, time); return mshr; } @@ -534,7 +534,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr) unblock = wb.isFull(); wb.markInService(mshr); if (!wb.havePending()){ - cache->clearMasterRequest(Request_WB); + cache->deassertMemSideBusRequest(Request_WB); } if (unblock) { // Do we really unblock? @@ -545,7 +545,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr) unblock = mq.isFull(); mq.markInService(mshr); if (!mq.havePending()){ - cache->clearMasterRequest(Request_MSHR); + cache->deassertMemSideBusRequest(Request_MSHR); } if (mshr->originalCmd == MemCmd::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", @@ -553,7 +553,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr) //Also clear pending if need be if (!prefetcher->havePending()) { - cache->clearMasterRequest(Request_PF); + cache->deassertMemSideBusRequest(Request_PF); } } if (unblock) { @@ -602,7 +602,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time) mshr->pkt->req = mshr->getTarget()->req; mq.markPending(mshr, cmd); mshr->order = order++; - cache->setMasterRequest(Request_MSHR, time); + cache->requestMemSideBus(Request_MSHR, time); } else { unblock = mq.isFull(); @@ -683,7 +683,7 @@ MissQueue::squash(int threadNum) } mq.squash(threadNum); if (!mq.havePending()) { - cache->clearMasterRequest(Request_MSHR); + cache->deassertMemSideBusRequest(Request_MSHR); } if (unblock && !mq.isFull()) { cache->clearBlocked(cause); diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 44daf75e1..966f7d005 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -141,7 +141,7 @@ BasePrefetcher::getPacket() keepTrying = cache->inCache(pkt->getAddr()); } if (pf.empty()) { - cache->clearMasterRequest(Request_PF); + cache->deassertMemSideBusRequest(Request_PF); if (keepTrying) return NULL; //None left, all were in cache } } while (keepTrying); @@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) pfRemovedMSHR++; pf.erase(iter); if (pf.empty()) - cache->clearMasterRequest(Request_PF); + cache->deassertMemSideBusRequest(Request_PF); } //Remove anything in queue with delay older than time @@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) iter--; } if (pf.empty()) - cache->clearMasterRequest(Request_PF); + cache->deassertMemSideBusRequest(Request_PF); } @@ -244,7 +244,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) prefetch->flags |= CACHE_LINE_FILL; //Make sure to request the bus, with proper delay - cache->setMasterRequest(Request_PF, prefetch->time); + cache->requestMemSideBus(Request_PF, prefetch->time); //Increment through the list addr++; diff --git a/src/mem/packet.hh b/src/mem/packet.hh index dc23e9f6d..577f99116 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -61,8 +61,8 @@ typedef std::list PacketList; #define CACHE_LINE_FILL (1 << 3) #define COMPRESSED (1 << 4) #define NO_ALLOCATE (1 << 5) -#define SNOOP_COMMIT (1 << 6) +#define EXPRESS_SNOOP (1 << 7) class MemCmd { -- cgit v1.2.3 From 9048c695a0ecde709a074259bad9ad1cda57a303 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 22 May 2007 06:29:48 -0700 Subject: Another pass of minor changes in preparation for new protocol. src/mem/cache/cache_impl.hh: src/mem/cache/coherence/simple_coherence.hh: Get rid of old invalidate propagation logic in preparation for new multilevel snoop protocol. src/mem/cache/coherence/coherence_protocol.cc: L2 cache now has protocol, so protocol must handle ReadExReq coming in from the CPU side. src/mem/cache/miss/mshr_queue.cc: Assertion is failing, so let's take it out for now. src/mem/packet.cc: src/mem/packet.hh: Add WritebackAck command. Reorganize enum to put responses next to corresponding requests. Get rid of unused WriteReqNoAck. --HG-- extra : convert_revision : 24c519846d161978123f9aa029ae358a41546c73 --- src/mem/cache/cache_impl.hh | 17 ++--------------- src/mem/cache/coherence/coherence_protocol.cc | 3 +++ src/mem/cache/coherence/simple_coherence.hh | 6 ------ src/mem/cache/miss/mshr_queue.cc | 1 - src/mem/packet.cc | 11 ++++++----- src/mem/packet.hh | 4 ++-- 6 files changed, 13 insertions(+), 29 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 6b9eac865..56352c110 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -794,14 +794,7 @@ Cache::snoop(PacketPtr &pkt) return; } - //Send a timing (true) invalidate up if the protocol calls for it - if (coherence->propogateInvalidate(pkt, true)) { - //Temp hack, we had a functional read hit in the L1, mark as success - pkt->flags |= SATISFIED; - pkt->result = Packet::Success; - respondToSnoop(pkt, curTick + hitLatency); - return; - } + ///// PROPAGATE SNOOP UPWARD HERE Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt->getAddr()); @@ -1097,13 +1090,7 @@ template Tick Cache::snoopProbe(PacketPtr &pkt) { - //Send a atomic (false) invalidate up if the protocol calls for it - if (coherence->propogateInvalidate(pkt, false)) { - //Temp hack, we had a functional read hit in the L1, mark as success - pkt->flags |= SATISFIED; - pkt->result = Packet::Success; - return hitLatency; - } + ///// PROPAGATE SNOOP UPWARD HERE Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt->getAddr()); diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 33a8a4e63..bc8de0d26 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -295,11 +295,14 @@ CoherenceProtocol::CoherenceProtocol(const string &name, tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq); // we only support write allocate right now tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq); + tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq); tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq); tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd); + tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq); tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd); if (hasOwned) { tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd); + tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq); tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd); } diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh index 1c89c703a..095260ca4 100644 --- a/src/mem/cache/coherence/simple_coherence.hh +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -161,12 +161,6 @@ class SimpleCoherence bool allowFastWrites() { return false; } bool hasProtocol() { return true; } - - bool propogateInvalidate(PacketPtr pkt, bool isTiming) - { - //For now we do nothing, asssumes simple coherence is top level of cache - return false; - } }; #endif //__SIMPLE_COHERENCE_HH__ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index add11dfe7..e9aa89bf8 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -119,7 +119,6 @@ MSHRQueue::allocate(PacketPtr &pkt, int size) if (!pkt->needsResponse()) { mshr->allocateAsBuffer(pkt); } else { - assert(size !=0); mshr->allocate(pkt->cmd, aligned_addr, size, pkt); allocatedTargets += 1; } diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 2463a19ba..8c69def37 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -56,17 +56,18 @@ MemCmd::commandInfo[] = { 0, InvalidCmd, "InvalidCmd" }, /* ReadReq */ { SET3(IsRead, IsRequest, NeedsResponse), ReadResp, "ReadReq" }, + /* ReadResp */ + { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" }, /* WriteReq */ { SET4(IsWrite, IsRequest, NeedsResponse, HasData), WriteResp, "WriteReq" }, - /* WriteReqNoAck */ - { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "WriteReqNoAck" }, - /* ReadResp */ - { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" }, /* WriteResp */ { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" }, /* Writeback */ - { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "Writeback" }, + { SET4(IsWrite, IsRequest, HasData, NeedsResponse), + WritebackAck, "Writeback" }, + /* WritebackAck */ + { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" }, /* SoftPFReq */ { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse), SoftPFResp, "SoftPFReq" }, diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 577f99116..413ffa26b 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -73,11 +73,11 @@ class MemCmd { InvalidCmd, ReadReq, - WriteReq, - WriteReqNoAck, ReadResp, + WriteReq, WriteResp, Writeback, + WritebackAck, SoftPFReq, HardPFReq, SoftPFResp, -- cgit v1.2.3 From 41dde5f6fdf195b8d51d70119737c5e3f7391f78 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 22 May 2007 06:32:24 -0700 Subject: memtest.hh: Fix description string. Minor whitespace cleanup. src/cpu/memtest/memtest.hh: Fix description string. Minor whitespace cleanup. --HG-- extra : convert_revision : 0c7213d088da46de9713ca6beabc30523ccb1c8c --- src/cpu/memtest/memtest.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 264309fd7..84e16b98a 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -85,13 +85,13 @@ class MemTest : public MemObject TickEvent(MemTest *c) : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) {} void process() {cpu->tick();} - virtual const char *description() { return "tick event"; } + virtual const char *description() { return "MemTest tick"; } }; TickEvent tickEvent; + class CpuPort : public Port { - MemTest *memtest; public: -- cgit v1.2.3 From da46364b1878339841e9cda5a62ee104409b6535 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 22 May 2007 07:30:55 -0700 Subject: Fix getDeviceAddressRanges() to get snooping right. --HG-- extra : convert_revision : 2aeab25ef955ab9db7b968786faff227239fbbe4 --- src/mem/cache/base_cache.cc | 8 -------- src/mem/cache/base_cache.hh | 3 --- src/mem/cache/cache.hh | 6 ++++++ src/mem/cache/cache_impl.hh | 24 ++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index d75d35ebb..a47c19e60 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -57,7 +57,6 @@ BaseCache::BaseCache(const std::string &name, Params ¶ms) } - void BaseCache::CachePort::recvStatusChange(Port::Status status) { @@ -66,13 +65,6 @@ BaseCache::CachePort::recvStatusChange(Port::Status status) } } -void -BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) -{ - AddrRangeList dummy; - otherPort->getPeerAddressRanges(resp, dummy); -} - int BaseCache::CachePort::deviceBlockSize() { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index f7107a86a..a27ac1788 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -91,9 +91,6 @@ class BaseCache : public MemObject virtual void recvStatusChange(Status status); - virtual void getDeviceAddressRanges(AddrRangeList &resp, - bool &snoop); - virtual int deviceBlockSize(); bool recvRetryCommon(); diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 75fb50f4e..e14b2efe8 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -89,6 +89,9 @@ class Cache : public BaseCache void processRequestEvent(); void processResponseEvent(); + virtual void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop); + virtual bool recvTiming(PacketPtr pkt); virtual void recvRetry(); @@ -124,6 +127,9 @@ class Cache : public BaseCache void processRequestEvent(); void processResponseEvent(); + virtual void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop); + virtual bool recvTiming(PacketPtr pkt); virtual void recvRetry(); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 56352c110..a7f96603e 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1139,6 +1139,18 @@ Cache::deletePortRefs(Port *p) } +template +void +Cache::CpuSidePort:: +getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) +{ + // CPU side port doesn't snoop; it's a target only. + bool dummy; + otherPort->getPeerAddressRanges(resp, dummy); + snoop = false; +} + + template bool Cache::CpuSidePort::recvTiming(PacketPtr pkt) @@ -1260,6 +1272,18 @@ Cache::CpuSidePort::recvFunctional(PacketPtr pkt) } +template +void +Cache::MemSidePort:: +getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) +{ + // Memory-side port always snoops. + bool dummy; + otherPort->getPeerAddressRanges(resp, dummy); + snoop = true; +} + + template bool Cache::MemSidePort::recvTiming(PacketPtr pkt) -- cgit v1.2.3 From 35cf19d441ed15d054d00674ec67ab5bc769f6d7 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 17 Jun 2007 17:27:53 -0700 Subject: More major reorg of cache. Seems to work for atomic mode now, timing mode still broken. configs/example/memtest.py: Revamp options. src/cpu/memtest/memtest.cc: No need for memory initialization. No need to make atomic response... memory system should do that now. src/cpu/memtest/memtest.hh: MemTest really doesn't want to snoop. src/mem/bridge.cc: checkFunctional() cleanup. src/mem/bus.cc: src/mem/bus.hh: src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: src/mem/cache/cache.cc: src/mem/cache/cache.hh: src/mem/cache/cache_blk.hh: src/mem/cache/cache_builder.cc: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/coherence/coherence_protocol.hh: src/mem/cache/coherence/simple_coherence.hh: src/mem/cache/miss/SConscript: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: src/mem/cache/miss/mshr_queue.hh: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/fa_lru.hh: src/mem/cache/tags/iic.cc: src/mem/cache/tags/iic.hh: src/mem/cache/tags/lru.cc: src/mem/cache/tags/lru.hh: src/mem/cache/tags/split.cc: src/mem/cache/tags/split.hh: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lifo.hh: src/mem/cache/tags/split_lru.cc: src/mem/cache/tags/split_lru.hh: src/mem/packet.cc: src/mem/packet.hh: src/mem/physical.cc: src/mem/physical.hh: src/mem/tport.cc: More major reorg. Seems to work for atomic mode now, timing mode still broken. --HG-- extra : convert_revision : 7e70dfc4a752393b911880ff028271433855ae87 --- configs/example/memtest.py | 92 +- src/cpu/memtest/memtest.cc | 28 +- src/cpu/memtest/memtest.hh | 2 +- src/mem/bridge.cc | 11 +- src/mem/bus.cc | 87 +- src/mem/bus.hh | 3 - src/mem/cache/base_cache.cc | 413 ++++-- src/mem/cache/base_cache.hh | 230 +-- src/mem/cache/cache.cc | 3 - src/mem/cache/cache.hh | 271 ++-- src/mem/cache/cache_blk.hh | 25 +- src/mem/cache/cache_builder.cc | 27 +- src/mem/cache/cache_impl.hh | 1976 +++++++++++-------------- src/mem/cache/coherence/coherence_protocol.cc | 40 +- src/mem/cache/coherence/coherence_protocol.hh | 4 +- src/mem/cache/coherence/simple_coherence.hh | 15 +- src/mem/cache/miss/SConscript | 3 - src/mem/cache/miss/blocking_buffer.cc | 245 --- src/mem/cache/miss/blocking_buffer.hh | 209 --- src/mem/cache/miss/miss_buffer.cc | 62 - src/mem/cache/miss/miss_buffer.hh | 223 --- src/mem/cache/miss/miss_queue.cc | 752 ---------- src/mem/cache/miss/miss_queue.hh | 327 ---- src/mem/cache/miss/mshr.cc | 78 +- src/mem/cache/miss/mshr.hh | 77 +- src/mem/cache/miss/mshr_queue.cc | 90 +- src/mem/cache/miss/mshr_queue.hh | 137 +- src/mem/cache/prefetch/base_prefetcher.cc | 1 - src/mem/cache/tags/fa_lru.cc | 5 +- src/mem/cache/tags/fa_lru.hh | 7 +- src/mem/cache/tags/iic.cc | 32 +- src/mem/cache/tags/iic.hh | 27 +- src/mem/cache/tags/lru.cc | 5 +- src/mem/cache/tags/lru.hh | 15 +- src/mem/cache/tags/split.cc | 25 +- src/mem/cache/tags/split.hh | 7 +- src/mem/cache/tags/split_lifo.cc | 7 +- src/mem/cache/tags/split_lifo.hh | 15 +- src/mem/cache/tags/split_lru.cc | 5 +- src/mem/cache/tags/split_lru.hh | 15 +- src/mem/packet.cc | 99 +- src/mem/packet.hh | 194 ++- src/mem/physical.cc | 208 ++- src/mem/physical.hh | 12 +- src/mem/tport.cc | 13 +- 45 files changed, 2041 insertions(+), 4081 deletions(-) delete mode 100644 src/mem/cache/miss/blocking_buffer.cc delete mode 100644 src/mem/cache/miss/blocking_buffer.hh delete mode 100644 src/mem/cache/miss/miss_buffer.cc delete mode 100644 src/mem/cache/miss/miss_buffer.hh delete mode 100644 src/mem/cache/miss/miss_queue.cc delete mode 100644 src/mem/cache/miss/miss_queue.hh diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 9fd943aaa..9027a9866 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -33,14 +33,32 @@ m5.AddToPath('../common') parser = optparse.OptionParser() -parser.add_option("--caches", action="store_true") -parser.add_option("-t", "--timing", action="store_true") -parser.add_option("-m", "--maxtick", type="int") -parser.add_option("-l", "--maxloads", default = "1000000000000", type="int") -parser.add_option("-n", "--numtesters", default = "8", type="int") -parser.add_option("-p", "--protocol", - default="moesi", - help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") +parser.add_option("-c", "--cache-levels", type="int", default=2, + metavar="LEVELS", + help="Number of cache levels [default: %default]") +parser.add_option("-a", "--atomic", action="store_true", + help="Use atomic (non-timing) mode") +parser.add_option("-b", "--blocking", action="store_true", + help="Use blocking caches") +parser.add_option("-l", "--maxloads", default="1G", metavar="N", + help="Stop after N loads [default: %default]") +parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick, + metavar="T", + help="Stop after T ticks") +parser.add_option("-n", "--numtesters", type="int", default=8, + metavar="N", + help="Number of tester pseudo-CPUs [default: %default]") +parser.add_option("-p", "--protocol", default="moesi", + help="Coherence protocol [default: %default]") + +parser.add_option("-f", "--functional", type="int", default=0, + metavar="PCT", + help="Target percentage of functional accesses " + "[default: %default]") +parser.add_option("-u", "--uncacheable", type="int", default=0, + metavar="PCT", + help="Target percentage of uncacheable accesses " + "[default: %default]") (options, args) = parser.parse_args() @@ -48,14 +66,29 @@ if args: print "Error: script doesn't take any positional arguments" sys.exit(1) +# Should generalize this someday... would be cool to have a loop that +# just iterates, adding a level of caching each time. +#if options.cache_levels != 2 and options.cache_levels != 0: +# print "Error: number of cache levels must be 0 or 2" +# sys.exit(1) + +if options.blocking: + num_l1_mshrs = 1 + num_l2_mshrs = 1 +else: + num_l1_mshrs = 12 + num_l2_mshrs = 92 + +block_size = 64 + # -------------------- # Base L1 Cache # ==================== class L1(BaseCache): latency = '1ns' - block_size = 64 - mshrs = 12 + block_size = block_size + mshrs = num_l1_mshrs tgts_per_mshr = 8 protocol = CoherenceProtocol(protocol=options.protocol) @@ -64,29 +97,31 @@ class L1(BaseCache): # ---------------------- class L2(BaseCache): - block_size = 64 + block_size = block_size latency = '10ns' - mshrs = 92 + mshrs = num_l2_mshrs tgts_per_mshr = 16 write_buffers = 8 + protocol = CoherenceProtocol(protocol=options.protocol) -#MAX CORES IS 8 with the false sharing method -if options.numtesters > 8: - print "Error: NUmber of testers limited to 8 because of false sharing" - sys,exit(1) +if options.numtesters > block_size: + print "Error: Number of testers limited to %s because of false sharing" \ + % (block_size) + sys.exit(1) -cpus = [ MemTest(atomic=not options.timing, max_loads=options.maxloads, - percent_functional=50, percent_uncacheable=10, +cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, + percent_functional=options.functional, + percent_uncacheable=options.uncacheable, progress_interval=1000) for i in xrange(options.numtesters) ] # system simulated system = System(cpu = cpus, funcmem = PhysicalMemory(), - physmem = PhysicalMemory(latency = "50ps"), + physmem = PhysicalMemory(latency = "100ns"), membus = Bus(clock="500MHz", width=16)) # l2cache & bus -if options.caches: +if options.cache_levels == 2: system.toL2Bus = Bus(clock="500MHz", width=16) system.l2c = L2(size='64kB', assoc=8) system.l2c.cpu_side = system.toL2Bus.port @@ -96,10 +131,14 @@ if options.caches: # add L1 caches for cpu in cpus: - if options.caches: + if options.cache_levels == 2: cpu.l1c = L1(size = '32kB', assoc = 4) cpu.test = cpu.l1c.cpu_side cpu.l1c.mem_side = system.toL2Bus.port + elif options.cache_levels == 1: + cpu.l1c = L1(size = '32kB', assoc = 4) + cpu.test = cpu.l1c.cpu_side + cpu.l1c.mem_side = system.membus.port else: cpu.test = system.membus.port system.funcmem.port = cpu.functional @@ -113,10 +152,10 @@ system.physmem.port = system.membus.port # ----------------------- root = Root( system = system ) -if options.timing: - root.system.mem_mode = 'timing' -else: +if options.atomic: root.system.mem_mode = 'atomic' +else: + root.system.mem_mode = 'timing' # Not much point in this being higher than the L1 latency m5.ticks.setGlobalFrequency('1ns') @@ -125,9 +164,6 @@ m5.ticks.setGlobalFrequency('1ns') m5.instantiate(root) # simulate until program terminates -if options.maxtick: - exit_event = m5.simulate(options.maxtick) -else: - exit_event = m5.simulate(10000000000000) +exit_event = m5.simulate(options.maxtick) print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 607cf1066..5d89f1b82 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -102,7 +102,6 @@ void MemTest::sendPkt(PacketPtr pkt) { if (atomic) { cachePort.sendAtomic(pkt); - pkt->makeAtomicResponse(); completeRequest(pkt); } else if (!cachePort.sendTiming(pkt)) { @@ -165,8 +164,6 @@ MemTest::MemTest(const string &name, tickEvent.schedule(0); id = TESTER_ALLOCATOR++; - if (TESTER_ALLOCATOR > 8) - panic("False sharing memtester only allows up to 8 testers"); accessRetry = false; } @@ -190,14 +187,8 @@ MemTest::init() blockAddrMask = blockSize - 1; traceBlockAddr = blockAddr(traceBlockAddr); - // set up intial memory contents here - - cachePort.memsetBlob(baseAddr1, 1, size); - funcPort.memsetBlob(baseAddr1, 1, size); - cachePort.memsetBlob(baseAddr2, 2, size); - funcPort.memsetBlob(baseAddr2, 2, size); - cachePort.memsetBlob(uncacheAddr, 3, size); - funcPort.memsetBlob(uncacheAddr, 3, size); + // initial memory contents for both physical memory and functional + // memory should be 0; no need to initialize them. } static void @@ -230,15 +221,10 @@ MemTest::completeRequest(PacketPtr pkt) case MemCmd::ReadResp: if (memcmp(pkt_data, data, pkt->getSize()) != 0) { - cerr << name() << ": on read of 0x" << hex << req->getPaddr() - << " (0x" << hex << blockAddr(req->getPaddr()) << ")" - << "@ cycle " << dec << curTick - << ", cache returns 0x"; - printData(cerr, pkt_data, pkt->getSize()); - cerr << ", expected 0x"; - printData(cerr, data, pkt->getSize()); - cerr << endl; - fatal(""); + panic("%s: read of %x (blk %x) @ cycle %d " + "returns %x, expected %x\n", name(), + req->getPaddr(), blockAddr(req->getPaddr()), curTick, + *pkt_data, *data); } numReads++; @@ -267,7 +253,7 @@ MemTest::completeRequest(PacketPtr pkt) break; */ default: - panic("invalid command"); + panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt()); } if (blockAddr(req->getPaddr()) == traceBlockAddr) { diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index a6b08d61c..565fafb77 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -116,7 +116,7 @@ class MemTest : public MemObject virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) - { resp.clear(); snoop = true; } + { resp.clear(); snoop = false; } }; CpuPort cachePort; diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index eebf91a85..fb4574844 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -339,17 +339,14 @@ void Bridge::BridgePort::recvFunctional(PacketPtr pkt) { std::list::iterator i; - bool pktContinue = true; for (i = sendQueue.begin(); i != sendQueue.end(); ++i) { - if (pkt->intersect((*i)->pkt)) { - pktContinue &= fixPacket(pkt, (*i)->pkt); - } + if (pkt->checkFunctional((*i)->pkt)) + return; } - if (pktContinue) { - otherPort->sendFunctional(pkt); - } + // fall through if pkt still not satisfied + otherPort->sendFunctional(pkt); } /** Function called by the port when the bus is receiving a status change.*/ diff --git a/src/mem/bus.cc b/src/mem/bus.cc index ec33bd4c5..bde90c83f 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -33,7 +33,7 @@ * Definition of a bus object. */ - +#include #include #include "base/misc.hh" @@ -182,8 +182,7 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (!(pkt->flags & EXPRESS_SNOOP) && - tickNextIdle > curTick || + if (tickNextIdle > curTick || (retryList.size() && (!inRetry || pktPort != retryList.front()))) { addToRetryList(pktPort); @@ -199,7 +198,7 @@ Bus::recvTiming(PacketPtr pkt) port = findPort(pkt->getAddr(), pkt->getSrc()); timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); - if (pkt->flags & SATISFIED) { + if (pkt->memInhibitAsserted()) { //Cache-Cache transfer occuring if (inRetry) { retryList.front()->onRetryList(false); @@ -321,27 +320,6 @@ Bus::findPort(Addr addr, int id) return interfaces[dest_id]; } -Tick -Bus::atomicSnoop(PacketPtr pkt, Port *responder) -{ - Tick response_time = 0; - - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); - s_iter++) { - BusPort *p = *s_iter; - if (p != responder && p->getId() != pkt->getSrc()) { - Tick response = p->sendAtomic(pkt); - if (response) { - assert(!response_time); //Multiple responders - response_time = response; - } - } - } - - return response_time; -} - void Bus::functionalSnoop(PacketPtr pkt, Port *responder) { @@ -390,19 +368,56 @@ Bus::recvAtomic(PacketPtr pkt) pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - // Assume one bus cycle in order to get through. This may have - // some clock skew issues yet again... - pkt->finishTime = curTick + clock; + // Variables for recording original command and snoop response (if + // any)... if a snooper respondes, we will need to restore + // original command so that additional snoops can take place + // properly + MemCmd orig_cmd = pkt->cmd; + Packet::Result response_result = Packet::Unknown; + MemCmd response_cmd = MemCmd::InvalidCmd; - Port *port = findPort(pkt->getAddr(), pkt->getSrc()); - Tick snoopTime = atomicSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); + Port *target_port = findPort(pkt->getAddr(), pkt->getSrc()); - if (snoopTime) - return snoopTime; //Snoop satisfies it - else if (port) - return port->sendAtomic(pkt); - else - return 0; + SnoopIter s_end = snoopPorts.end(); + for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { + BusPort *p = *s_iter; + // same port should not have both target addresses and snooping + assert(p != target_port); + if (p->getId() != pkt->getSrc()) { + p->sendAtomic(pkt); + if (pkt->result != Packet::Unknown) { + // response from snoop agent + assert(pkt->cmd != orig_cmd); + assert(pkt->memInhibitAsserted()); + assert(pkt->isResponse()); + // should only happen once + assert(response_result == Packet::Unknown); + assert(response_cmd == MemCmd::InvalidCmd); + // save response state + response_result = pkt->result; + response_cmd = pkt->cmd; + // restore original packet state for remaining snoopers + pkt->cmd = orig_cmd; + pkt->result = Packet::Unknown; + } + } + } + + Tick response_time = target_port->sendAtomic(pkt); + + // if we got a response from a snooper, restore it here + if (response_result != Packet::Unknown) { + assert(response_cmd != MemCmd::InvalidCmd); + // no one else should have responded + assert(pkt->result == Packet::Unknown); + assert(pkt->cmd == orig_cmd); + pkt->cmd = response_cmd; + pkt->result = response_result; + } + + // why do we have this packet field and the return value both??? + pkt->finishTime = std::max(response_time, curTick + clock); + return pkt->finishTime; } /** Function called by the port when the bus is receiving a Functional diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 5dd98c07e..33619bf45 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -182,9 +182,6 @@ class Bus : public MemObject */ Port *findPort(Addr addr, int id); - /** Snoop all relevant ports atomicly. */ - Tick atomicSnoop(PacketPtr pkt, Port* responder); - /** Snoop all relevant ports functionally. */ void functionalSnoop(PacketPtr pkt, Port *responder); diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index a47c19e60..c7006550b 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -41,18 +41,23 @@ using namespace std; BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) - : Port(_name, _cache), cache(_cache), otherPort(NULL) + : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL), + blocked(false), waitingOnRetry(false), mustSendRetry(false), + requestCauses(0) { - blocked = false; - waitingOnRetry = false; } BaseCache::BaseCache(const std::string &name, Params ¶ms) : MemObject(name), - blocked(0), blockedSnoop(0), + mshrQueue(params.numMSHRs, 4), + writeBuffer(params.numWriteBuffers, params.numMSHRs+1000), blkSize(params.blkSize), - missCount(params.maxMisses), drainEvent(NULL) + numTarget(params.numTargets), + blocked(0), + noTargetMSHR(NULL), + missCount(params.maxMisses), + drainEvent(NULL) { } @@ -71,139 +76,21 @@ BaseCache::CachePort::deviceBlockSize() return cache->getBlockSize(); } -bool -BaseCache::CachePort::checkFunctional(PacketPtr pkt) -{ - //Check storage here first - list::iterator i = drainList.begin(); - list::iterator iend = drainList.end(); - bool notDone = true; - while (i != iend && notDone) { - PacketPtr target = *i; - // If the target contains data, and it overlaps the - // probed request, need to update data - if (target->intersect(pkt)) { - DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a drain\n", - pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1)); - notDone = fixPacket(pkt, target); - } - i++; - } - //Also check the response not yet ready to be on the list - std::list >::iterator j = transmitList.begin(); - std::list >::iterator jend = transmitList.end(); - - while (j != jend && notDone) { - PacketPtr target = j->second; - // If the target contains data, and it overlaps the - // probed request, need to update data - if (target->intersect(pkt)) { - DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a response\n", - pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1)); - notDone = fixDelayedResponsePacket(pkt, target); - } - j++; - } - return notDone; -} void BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt) { - bool notDone = checkFunctional(pkt); - if (notDone) + checkFunctional(pkt); + if (pkt->result != Packet::Success) sendFunctional(pkt); } -void -BaseCache::CachePort::respond(PacketPtr pkt, Tick time) -{ - assert(time >= curTick); - if (pkt->needsResponse()) { - if (transmitList.empty()) { - assert(!responseEvent->scheduled()); - responseEvent->schedule(time); - transmitList.push_back(std::pair(time,pkt)); - return; - } - - // something is on the list and this belongs at the end - if (time >= transmitList.back().first) { - transmitList.push_back(std::pair(time,pkt)); - return; - } - // Something is on the list and this belongs somewhere else - std::list >::iterator i = - transmitList.begin(); - std::list >::iterator end = - transmitList.end(); - bool done = false; - - while (i != end && !done) { - if (time < i->first) { - if (i == transmitList.begin()) { - //Inserting at begining, reschedule - responseEvent->reschedule(time); - } - transmitList.insert(i,std::pair(time,pkt)); - done = true; - } - i++; - } - } - else { - assert(0); - // this code was on the cpuSidePort only... do we still need it? - if (pkt->cmd != MemCmd::UpgradeReq) - { - delete pkt->req; - delete pkt; - } - } -} - -bool -BaseCache::CachePort::drainResponse() -{ - DPRINTF(CachePort, - "%s attempting to send a retry for response (%i waiting)\n", - name(), drainList.size()); - //We have some responses to drain first - PacketPtr pkt = drainList.front(); - if (sendTiming(pkt)) { - drainList.pop_front(); - DPRINTF(CachePort, "%s sucessful in sending a retry for" - "response (%i still waiting)\n", name(), drainList.size()); - if (!drainList.empty() || isBusRequested()) { - - DPRINTF(CachePort, "%s has more responses/requests\n", name()); - return false; - } - } else { - waitingOnRetry = true; - DPRINTF(CachePort, "%s now waiting on a retry\n", name()); - } - return true; -} - - bool BaseCache::CachePort::recvRetryCommon() { assert(waitingOnRetry); waitingOnRetry = false; - if (!drainList.empty()) { - if (!drainResponse()) { - // more responses to drain... re-request bus - scheduleRequestEvent(curTick + 1); - } - // Check if we're done draining once this list is empty - if (drainList.empty()) { - cache->checkDrain(); - } - return true; - } return false; } @@ -451,17 +338,289 @@ BaseCache::regStats() .desc("number of cache copies performed") ; + writebacks + .init(maxThreadsPerCPU) + .name(name() + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; + + // MSHR statistics + // MSHR hit statistics + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshr_hits[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_mshr_hits") + .desc("number of " + cstr + " MSHR hits") + .flags(total | nozero | nonan) + ; + } + + demandMshrHits + .name(name() + ".demand_mshr_hits") + .desc("number of demand (read+write) MSHR hits") + .flags(total) + ; + demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq]; + + overallMshrHits + .name(name() + ".overall_mshr_hits") + .desc("number of overall MSHR hits") + .flags(total) + ; + overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] + + mshr_hits[MemCmd::HardPFReq]; + + // MSHR miss statistics + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshr_misses[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_mshr_misses") + .desc("number of " + cstr + " MSHR misses") + .flags(total | nozero | nonan) + ; + } + + demandMshrMisses + .name(name() + ".demand_mshr_misses") + .desc("number of demand (read+write) MSHR misses") + .flags(total) + ; + demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq]; + + overallMshrMisses + .name(name() + ".overall_mshr_misses") + .desc("number of overall MSHR misses") + .flags(total) + ; + overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] + + mshr_misses[MemCmd::HardPFReq]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshr_miss_latency[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_mshr_miss_latency") + .desc("number of " + cstr + " MSHR miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMshrMissLatency + .name(name() + ".demand_mshr_miss_latency") + .desc("number of demand (read+write) MSHR miss cycles") + .flags(total) + ; + demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq] + + mshr_miss_latency[MemCmd::WriteReq]; + + overallMshrMissLatency + .name(name() + ".overall_mshr_miss_latency") + .desc("number of overall MSHR miss cycles") + .flags(total) + ; + overallMshrMissLatency = demandMshrMissLatency + + mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq]; + + // MSHR uncacheable statistics + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshr_uncacheable[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_mshr_uncacheable") + .desc("number of " + cstr + " MSHR uncacheable") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheable + .name(name() + ".overall_mshr_uncacheable_misses") + .desc("number of overall MSHR uncacheable misses") + .flags(total) + ; + overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq] + + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq] + + mshr_uncacheable[MemCmd::HardPFReq]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshr_uncacheable_lat[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_mshr_uncacheable_latency") + .desc("number of " + cstr + " MSHR uncacheable cycles") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheableLatency + .name(name() + ".overall_mshr_uncacheable_latency") + .desc("number of overall MSHR uncacheable cycles") + .flags(total) + ; + overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq] + + mshr_uncacheable_lat[MemCmd::WriteReq] + + mshr_uncacheable_lat[MemCmd::SoftPFReq] + + mshr_uncacheable_lat[MemCmd::HardPFReq]; + +#if 0 + // MSHR access formulas + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshrAccesses[access_idx] + .name(name() + "." + cstr + "_mshr_accesses") + .desc("number of " + cstr + " mshr accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + mshrAccesses[access_idx] = + mshr_hits[access_idx] + mshr_misses[access_idx] + + mshr_uncacheable[access_idx]; + } + + demandMshrAccesses + .name(name() + ".demand_mshr_accesses") + .desc("number of demand (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + demandMshrAccesses = demandMshrHits + demandMshrMisses; + + overallMshrAccesses + .name(name() + ".overall_mshr_accesses") + .desc("number of overall (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + overallMshrAccesses = overallMshrHits + overallMshrMisses + + overallMshrUncacheable; +#endif + + // MSHR miss rate formulas + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + mshrMissRate[access_idx] + .name(name() + "." + cstr + "_mshr_miss_rate") + .desc("mshr miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + mshrMissRate[access_idx] = + mshr_misses[access_idx] / accesses[access_idx]; + } + + demandMshrMissRate + .name(name() + ".demand_mshr_miss_rate") + .desc("mshr miss rate for demand accesses") + .flags(total) + ; + demandMshrMissRate = demandMshrMisses / demandAccesses; + + overallMshrMissRate + .name(name() + ".overall_mshr_miss_rate") + .desc("mshr miss rate for overall accesses") + .flags(total) + ; + overallMshrMissRate = overallMshrMisses / overallAccesses; + + // mshrMiss latency formulas + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + avgMshrMissLatency[access_idx] + .name(name() + "." + cstr + "_avg_mshr_miss_latency") + .desc("average " + cstr + " mshr miss latency") + .flags(total | nozero | nonan) + ; + + avgMshrMissLatency[access_idx] = + mshr_miss_latency[access_idx] / mshr_misses[access_idx]; + } + + demandAvgMshrMissLatency + .name(name() + ".demand_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; + + overallAvgMshrMissLatency + .name(name() + ".overall_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; + + // mshrUncacheable latency formulas + for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { + MemCmd cmd(access_idx); + const string &cstr = cmd.toString(); + + avgMshrUncacheableLatency[access_idx] + .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency") + .desc("average " + cstr + " mshr uncacheable latency") + .flags(total | nozero | nonan) + ; + + avgMshrUncacheableLatency[access_idx] = + mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + } + + overallAvgMshrUncacheableLatency + .name(name() + ".overall_avg_mshr_uncacheable_latency") + .desc("average overall mshr uncacheable latency") + .flags(total) + ; + overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; + + mshr_cap_events + .init(maxThreadsPerCPU) + .name(name() + ".mshr_cap_events") + .desc("number of times MSHR cap was activated") + .flags(total) + ; + + //software prefetching stats + soft_prefetch_mshr_full + .init(maxThreadsPerCPU) + .name(name() + ".soft_prefetch_mshr_full") + .desc("number of mshr full events for SW prefetching instrutions") + .flags(total) + ; + + mshr_no_allocate_misses + .name(name() +".no_allocate_misses") + .desc("Number of misses that were no-allocate") + ; + } unsigned int BaseCache::drain(Event *de) { + int count = memSidePort->drain(de) + cpuSidePort->drain(de); + // Set status - if (!canDrain()) { + if (count != 0) { drainEvent = de; changeState(SimObject::Draining); - return 1; + return count; } changeState(SimObject::Drained); diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index a27ac1788..5969b4b3f 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -46,11 +46,13 @@ #include "base/misc.hh" #include "base/statistics.hh" #include "base/trace.hh" +#include "mem/cache/miss/mshr_queue.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" -#include "mem/port.hh" +#include "mem/tport.hh" #include "mem/request.hh" #include "sim/eventq.hh" +#include "sim/sim_exit.hh" /** * Reasons for Caches to be Blocked. @@ -79,7 +81,7 @@ class MSHR; */ class BaseCache : public MemObject { - class CachePort : public Port + class CachePort : public SimpleTimingPort { public: BaseCache *cache; @@ -102,77 +104,76 @@ class BaseCache : public MemObject void clearBlocked(); - bool checkFunctional(PacketPtr pkt); - void checkAndSendFunctional(PacketPtr pkt); - bool canDrain() { return drainList.empty() && transmitList.empty(); } - - bool drainResponse(); - CachePort *otherPort; bool blocked; - bool mustSendRetry; - bool waitingOnRetry; + bool mustSendRetry; + /** * Bit vector for the outstanding requests for the master interface. */ uint8_t requestCauses; - std::list drainList; - - std::list > transmitList; - bool isBusRequested() { return requestCauses != 0; } - // These need to be virtual since the Event objects depend on - // cache template parameters. - virtual void scheduleRequestEvent(Tick t) = 0; - void requestBus(RequestCause cause, Tick time) { + DPRINTF(Cache, "Asserting bus request for cause %d\n", cause); if (!isBusRequested() && !waitingOnRetry) { - scheduleRequestEvent(time); + assert(!sendEvent->scheduled()); + sendEvent->schedule(time); } requestCauses |= (1 << cause); } void deassertBusRequest(RequestCause cause) { + DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause); requestCauses &= ~(1 << cause); } - void respond(PacketPtr pkt, Tick time); + void respond(PacketPtr pkt, Tick time) { + schedSendTiming(pkt, time); + } }; public: //Made public so coherence can get at it. CachePort *cpuSidePort; CachePort *memSidePort; - private: + protected: + + /** Miss status registers */ + MSHRQueue mshrQueue; + + /** Write/writeback buffer */ + MSHRQueue writeBuffer; + + /** Block size of this cache */ + const int blkSize; + + /** The number of targets for each MSHR. */ + const int numTarget; + + /** Increasing order number assigned to each incoming request. */ + uint64_t order; + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause */ uint8_t blocked; - /** - * Bit vector for the blocking reasons for the snoop path. - * @sa #BlockedCause - */ - uint8_t blockedSnoop; - - protected: - /** Stores time the cache blocked for statistics. */ Tick blockedCycle; - /** Block size of this cache */ - const int blkSize; + /** Pointer to the MSHR that has no targets. */ + MSHR *noTargetMSHR; /** The number of misses to trigger an exit event. */ Counter missCount; @@ -246,6 +247,73 @@ class BaseCache : public MemObject /** The number of cache copies performed. */ Stats::Scalar<> cacheCopies; + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** Number of misses that hit in the MSHRs per command and thread. */ + Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS]; + /** Demand misses that hit in the MSHRs. */ + Stats::Formula demandMshrHits; + /** Total number of misses that hit in the MSHRs. */ + Stats::Formula overallMshrHits; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS]; + /** Demand misses that miss in the MSHRs. */ + Stats::Formula demandMshrMisses; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrMisses; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS]; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrUncacheable; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS]; + /** Total cycle latency of demand MSHR misses. */ + Stats::Formula demandMshrMissLatency; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrMissLatency; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS]; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrUncacheableLatency; + + /** The total number of MSHR accesses per command and thread. */ + Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS]; + /** The total number of demand MSHR accesses. */ + Stats::Formula demandMshrAccesses; + /** The total number of MSHR accesses. */ + Stats::Formula overallMshrAccesses; + + /** The miss rate in the MSHRs pre command and thread. */ + Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS]; + /** The demand miss rate in the MSHRs. */ + Stats::Formula demandMshrMissRate; + /** The overall miss rate in the MSHRs. */ + Stats::Formula overallMshrMissRate; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS]; + /** The average latency of a demand MSHR miss. */ + Stats::Formula demandAvgMshrMissLatency; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrMissLatency; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS]; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrUncacheableLatency; + + /** The number of times a thread hit its MSHR cap. */ + Stats::Vector<> mshr_cap_events; + /** The number of times software prefetches caused the MSHR to block. */ + Stats::Vector<> soft_prefetch_mshr_full; + + Stats::Scalar<> mshr_no_allocate_misses; + /** * @} */ @@ -260,12 +328,13 @@ class BaseCache : public MemObject class Params { public: - /** List of address ranges of this cache. */ - std::vector > addrRange; /** The hit latency for this cache. */ int hitLatency; /** The block size of this cache. */ int blkSize; + int numMSHRs; + int numTargets; + int numWriteBuffers; /** * The maximum number of misses this cache should handle before * ending the simulation. @@ -275,10 +344,12 @@ class BaseCache : public MemObject /** * Construct an instance of this parameter class. */ - Params(std::vector > addr_range, - int hit_latency, int _blkSize, Counter max_misses) - : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize), - maxMisses(max_misses) + Params(int _hitLatency, int _blkSize, + int _numMSHRs, int _numTargets, int _numWriteBuffers, + Counter _maxMisses) + : hitLatency(_hitLatency), blkSize(_blkSize), + numMSHRs(_numMSHRs), numTargets(_numTargets), + numWriteBuffers(_numWriteBuffers), maxMisses(_maxMisses) { } }; @@ -307,6 +378,10 @@ class BaseCache : public MemObject return blkSize; } + + Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); } + + /** * Returns true if the cache is blocked for accesses. */ @@ -315,14 +390,6 @@ class BaseCache : public MemObject return blocked != 0; } - /** - * Returns true if the cache is blocked for snoops. - */ - bool isBlockedForSnoop() - { - return blockedSnoop != 0; - } - /** * Marks the access path of the cache as blocked for the given cause. This * also sets the blocked flag in the slave interface. @@ -345,23 +412,6 @@ class BaseCache : public MemObject } } - /** - * Marks the snoop path of the cache as blocked for the given cause. This - * also sets the blocked flag in the master interface. - * @param cause The reason to block the snoop path. - */ - void setBlockedForSnoop(BlockedCause cause) - { - uint8_t flag = 1 << cause; - uint8_t old_state = blockedSnoop; - if (!(blockedSnoop & flag)) { - //Wasn't already blocked for this cause - blockedSnoop |= flag; - if (!old_state) - memSidePort->setBlocked(); - } - } - /** * Marks the cache as unblocked for the given cause. This also clears the * blocked flags in the appropriate interfaces. @@ -383,13 +433,6 @@ class BaseCache : public MemObject cpuSidePort->clearBlocked(); } } - if (blockedSnoop & flag) - { - blockedSnoop &= ~flag; - if (!isBlockedForSnoop()) { - memSidePort->clearBlocked(); - } - } } /** @@ -418,55 +461,26 @@ class BaseCache : public MemObject void deassertMemSideBusRequest(RequestCause cause) { memSidePort->deassertBusRequest(cause); - checkDrain(); + // checkDrain(); } - /** - * Send a response to the slave interface. - * @param pkt The request being responded to. - * @param time The time the response is ready. - */ - void respond(PacketPtr pkt, Tick time) - { - cpuSidePort->respond(pkt, time); - } + virtual unsigned int drain(Event *de); - /** - * Suppliess the data if cache to cache transfers are enabled. - * @param pkt The bus transaction to fulfill. - */ - void respondToSnoop(PacketPtr pkt, Tick time) - { - memSidePort->respond(pkt, time); - } + virtual bool inCache(Addr addr) = 0; - virtual unsigned int drain(Event *de); + virtual bool inMissQueue(Addr addr) = 0; - void checkDrain() + void incMissCount(PacketPtr pkt) { - if (drainEvent && canDrain()) { - drainEvent->process(); - changeState(SimObject::Drained); - // Clear the drain event - drainEvent = NULL; - } - } + misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - bool canDrain() - { - if (isMemSideBusRequested()) { - return false; - } else if (memSidePort && !memSidePort->canDrain()) { - return false; - } else if (cpuSidePort && !cpuSidePort->canDrain()) { - return false; + if (missCount) { + --missCount; + if (missCount == 0) + exitSimLoop("A cache reached the maximum miss count"); } - return true; } - virtual bool inCache(Addr addr) = 0; - - virtual bool inMissQueue(Addr addr) = 0; }; #endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index 2b4e7b9c8..96f9a2e11 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -58,9 +58,6 @@ #include "mem/cache/tags/split_lifo.hh" #endif -#include "mem/cache/miss/miss_queue.hh" -#include "mem/cache/miss/blocking_buffer.hh" - #include "mem/cache/coherence/simple_coherence.hh" #include "mem/cache/cache_impl.hh" diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index e14b2efe8..16d15cf86 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -45,12 +45,11 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/cache_blk.hh" -#include "mem/cache/miss/miss_buffer.hh" +#include "mem/cache/miss/mshr.hh" #include "sim/eventq.hh" //Forward decleration -class MSHR; class BasePrefetcher; /** @@ -86,29 +85,14 @@ class Cache : public BaseCache return static_cast *>(cache); } - void processRequestEvent(); - void processResponseEvent(); - virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop); virtual bool recvTiming(PacketPtr pkt); - virtual void recvRetry(); - virtual Tick recvAtomic(PacketPtr pkt); virtual void recvFunctional(PacketPtr pkt); - - typedef EventWrapper - ResponseEvent; - - typedef EventWrapper - RequestEvent; - - virtual void scheduleRequestEvent(Tick t) { - new RequestEvent(this, t); - } }; class MemSidePort : public CachePort @@ -124,8 +108,9 @@ class Cache : public BaseCache return static_cast *>(cache); } - void processRequestEvent(); - void processResponseEvent(); + void sendPacket(); + + void processSendEvent(); virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop); @@ -138,21 +123,13 @@ class Cache : public BaseCache virtual void recvFunctional(PacketPtr pkt); - typedef EventWrapper - ResponseEvent; - - typedef EventWrapper - RequestEvent; - - virtual void scheduleRequestEvent(Tick t) { - new RequestEvent(this, t); - } + typedef EventWrapper + SendEvent; }; /** Tag and data Storage */ TagStore *tags; - /** Miss and Writeback handler */ - MissBuffer *missQueue; + /** Coherence protocol. */ Coherence *coherence; @@ -176,23 +153,6 @@ class Cache : public BaseCache */ int hitLatency; - /** - * A permanent mem req to always be used to cause invalidations. - * Used to append to target list, to cause an invalidation. - */ - PacketPtr invalidatePkt; - Request *invalidateReq; - - /** - * Policy class for performing compression. - */ - CompressionAlgorithm *compressionAlg; - - /** - * The block size of this cache. Set to value in the Tags object. - */ - const int16_t blkSize; - /** * Can this cache should allocate a block on a line-sized write miss. */ @@ -200,50 +160,6 @@ class Cache : public BaseCache const bool prefetchMiss; - /** - * Can the data can be stored in a compressed form. - */ - const bool storeCompressed; - - /** - * Do we need to compress blocks on writebacks (i.e. because - * writeback bus is compressed but storage is not)? - */ - const bool compressOnWriteback; - - /** - * The latency of a compression operation. - */ - const int16_t compLatency; - - /** - * Should we use an adaptive compression scheme. - */ - const bool adaptiveCompression; - - /** - * Do writebacks need to be compressed (i.e. because writeback bus - * is compressed), whether or not they're already compressed for - * storage. - */ - const bool writebackCompressed; - - /** - * Compare the internal block data to the fast access block data. - * @param blk The cache block to check. - * @return True if the data is the same. - */ - bool verifyData(BlkType *blk); - - /** - * Update the internal data of the block. The data to write is assumed to - * be in the fast access data. - * @param blk The block with the data to update. - * @param writebacks A list to store any generated writebacks. - * @param compress_block True if we should compress this block - */ - void updateData(BlkType *blk, PacketList &writebacks, bool compress_block); - /** * Handle a replacement for the given request. * @param blk A pointer to the block, usually NULL @@ -251,7 +167,7 @@ class Cache : public BaseCache * @param new_state The new state of the block. * @param writebacks A list to store any generated writebacks. */ - BlkType* doReplacement(BlkType *blk, PacketPtr &pkt, + BlkType* doReplacement(BlkType *blk, PacketPtr pkt, CacheBlk::State new_state, PacketList &writebacks); /** @@ -263,59 +179,38 @@ class Cache : public BaseCache * @return Pointer to the cache block touched by the request. NULL if it * was a miss. */ - BlkType* handleAccess(PacketPtr &pkt, int & lat, - PacketList & writebacks, bool update = true); - + bool access(PacketPtr pkt, BlkType *blk, int & lat); /** *Handle doing the Compare and Swap function for SPARC. */ - void cmpAndSwap(BlkType *blk, PacketPtr &pkt); - - /** - * Populates a cache block and handles all outstanding requests for the - * satisfied fill request. This version takes an MSHR pointer and uses its - * request to fill the cache block, while repsonding to its targets. - * @param blk The cache block if it already exists. - * @param mshr The MSHR that contains the fill data and targets to satisfy. - * @param new_state The state of the new cache block. - * @param writebacks List for any writebacks that need to be performed. - * @return Pointer to the new cache block. - */ - BlkType* handleFill(BlkType *blk, MSHR * mshr, CacheBlk::State new_state, - PacketList & writebacks, PacketPtr pkt); + void cmpAndSwap(BlkType *blk, PacketPtr pkt); /** * Populates a cache block and handles all outstanding requests for the * satisfied fill request. This version takes two memory requests. One * contains the fill data, the other is an optional target to satisfy. * Used for Cache::probe. - * @param blk The cache block if it already exists. * @param pkt The memory request with the fill data. - * @param new_state The state of the new cache block. + * @param blk The cache block if it already exists. * @param writebacks List for any writebacks that need to be performed. - * @param target The memory request to perform after the fill. * @return Pointer to the new cache block. */ - BlkType* handleFill(BlkType *blk, PacketPtr &pkt, - CacheBlk::State new_state, - PacketList & writebacks, PacketPtr target = NULL); + BlkType *handleFill(PacketPtr pkt, BlkType *blk, + PacketList &writebacks); - /** - * Sets the blk to the new state and handles the given request. - * @param blk The cache block being snooped. - * @param new_state The new coherence state for the block. - * @param pkt The request to satisfy - */ - void handleSnoop(BlkType *blk, CacheBlk::State new_state, - PacketPtr &pkt); + bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk); + bool satisfyTarget(MSHR::Target *target, BlkType *blk); + void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk); + + void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data); /** * Sets the blk to the new state. * @param blk The cache block being snooped. * @param new_state The new coherence state for the block. */ - void handleSnoop(BlkType *blk, CacheBlk::State new_state); + void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing); /** * Create a writeback request for the given block. @@ -330,44 +225,24 @@ class Cache : public BaseCache { public: TagStore *tags; - MissBuffer *missQueue; Coherence *coherence; BaseCache::Params baseParams; BasePrefetcher*prefetcher; bool prefetchAccess; - int hitLatency; - CompressionAlgorithm *compressionAlg; - const int16_t blkSize; const bool doFastWrites; const bool prefetchMiss; - const bool storeCompressed; - const bool compressOnWriteback; - const int16_t compLatency; - const bool adaptiveCompression; - const bool writebackCompressed; - Params(TagStore *_tags, MissBuffer *mq, Coherence *coh, + Params(TagStore *_tags, Coherence *coh, BaseCache::Params params, BasePrefetcher *_prefetcher, bool prefetch_access, int hit_latency, bool do_fast_writes, - bool store_compressed, bool adaptive_compression, - bool writeback_compressed, - CompressionAlgorithm *_compressionAlg, int comp_latency, bool prefetch_miss) - : tags(_tags), missQueue(mq), coherence(coh), + : tags(_tags), coherence(coh), baseParams(params), prefetcher(_prefetcher), prefetchAccess(prefetch_access), - hitLatency(hit_latency), - compressionAlg(_compressionAlg), - blkSize(_tags->getBlockSize()), doFastWrites(do_fast_writes), - prefetchMiss(prefetch_miss), - storeCompressed(store_compressed), - compressOnWriteback(!store_compressed && writeback_compressed), - compLatency(comp_latency), - adaptiveCompression(adaptive_compression), - writebackCompressed(writeback_compressed) + prefetchMiss(prefetch_miss) { } }; @@ -385,85 +260,105 @@ class Cache : public BaseCache * @param pkt The request to perform. * @return The result of the access. */ - bool access(PacketPtr &pkt); + bool timingAccess(PacketPtr pkt); /** - * Selects a request to send on the bus. - * @return The memory request to service. + * Performs the access specified by the request. + * @param pkt The request to perform. + * @return The result of the access. */ - PacketPtr getPacket(); + Tick atomicAccess(PacketPtr pkt); /** - * Was the request was sent successfully? - * @param pkt The request. - * @param success True if the request was sent successfully. + * Performs the access specified by the request. + * @param pkt The request to perform. + * @return The result of the access. */ - void sendResult(PacketPtr &pkt, MSHR* mshr, bool success); + void functionalAccess(PacketPtr pkt, CachePort *otherSidePort); /** * Handles a response (cache line fill/write ack) from the bus. * @param pkt The request being responded to. */ - void handleResponse(PacketPtr &pkt); + void handleResponse(PacketPtr pkt); /** * Snoops bus transactions to maintain coherence. * @param pkt The current bus transaction. */ - void snoop(PacketPtr &pkt); + void snoopTiming(PacketPtr pkt); - void snoopResponse(PacketPtr &pkt); + /** + * Snoop for the provided request in the cache and return the estimated + * time of completion. + * @param pkt The memory request to snoop + * @return The estimated completion time. + */ + Tick snoopAtomic(PacketPtr pkt); /** * Squash all requests associated with specified thread. * intended for use by I-cache. * @param threadNum The thread to squash. */ - void squash(int threadNum) - { - missQueue->squash(threadNum); - } + void squash(int threadNum); /** - * Return the number of outstanding misses in a Cache. - * Default returns 0. - * - * @retval unsigned The number of missing still outstanding. + * Allocate a new MSHR or write buffer to handle a miss. + * @param pkt The access that missed. + * @param time The time to continue processing the miss. + * @param isFill Whether to fetch & allocate a block + * or just forward the request. */ - unsigned outstandingMisses() const - { - return missQueue->getMisses(); - } + MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill, + bool requestBus); /** - * Perform the access specified in the request and return the estimated - * time of completion. This function can either update the hierarchy state - * or just perform the access wherever the data is found depending on the - * state of the update flag. - * @param pkt The memory request to satisfy - * @param update If true, update the hierarchy, otherwise just perform the - * request. - * @return The estimated completion time. + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. */ - Tick probe(PacketPtr &pkt, bool update, CachePort * otherSidePort); + MSHR *getNextMSHR(); + PacketPtr getPacket(); /** - * Snoop for the provided request in the cache and return the estimated - * time of completion. - * @todo Can a snoop probe not change state? - * @param pkt The memory request to satisfy - * @param update If true, update the hierarchy, otherwise just perform the - * request. - * @return The estimated completion time. + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param pkt The request that was sent on the bus. */ - Tick snoopProbe(PacketPtr &pkt); + void markInService(MSHR *mshr); + + /** + * Collect statistics and free resources of a satisfied request. + * @param pkt The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(PacketPtr pkt, Tick time); + + /** + * Perform the given writeback request. + * @param pkt The writeback request. + */ + void doWriteback(PacketPtr pkt); + + /** + * Return whether there are any outstanding misses. + */ + bool outstandingMisses() const + { + return mshrQueue.allocated != 0; + } + + CacheBlk *findBlock(Addr addr) { + return tags->findBlock(addr); + } bool inCache(Addr addr) { return (tags->findBlock(addr) != 0); } bool inMissQueue(Addr addr) { - return (missQueue->findMSHR(addr) != 0); + return (mshrQueue.findMatch(addr) != 0); } }; diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index fa00a0f5a..d2aba9480 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -39,6 +39,7 @@ #include "sim/core.hh" // for Tick #include "arch/isa_traits.hh" // for Addr +#include "mem/packet.hh" #include "mem/request.hh" /** @@ -51,8 +52,6 @@ enum CacheBlkStatusBits { BlkWritable = 0x02, /** dirty (modified) */ BlkDirty = 0x04, - /** compressed */ - BlkCompressed = 0x08, /** block was referenced */ BlkReferenced = 0x10, /** block was a hardware prefetch yet unaccessed*/ @@ -174,20 +173,11 @@ class CacheBlk * Check to see if a block has been written. * @return True if the block is dirty. */ - bool isModified() const + bool isDirty() const { return (status & BlkDirty) != 0; } - /** - * Check to see if this block contains compressed data. - * @return True iF the block's data is compressed. - */ - bool isCompressed() const - { - return (status & BlkCompressed) != 0; - } - /** * Check if this block has been referenced. * @return True if the block has been referenced. @@ -213,10 +203,10 @@ class CacheBlk * redundant records on the list, but that's OK, as they'll all * get blown away at the next store. */ - void trackLoadLocked(Request *req) + void trackLoadLocked(PacketPtr pkt) { - assert(req->isLocked()); - lockList.push_front(Lock(req)); + assert(pkt->isLocked()); + lockList.push_front(Lock(pkt->req)); } /** @@ -230,9 +220,10 @@ class CacheBlk * @return True if write should proceed, false otherwise. Returns * false only in the case of a failed store conditional. */ - bool checkWrite(Request *req) + bool checkWrite(PacketPtr pkt) { - if (req->isLocked()) { + Request *req = pkt->req; + if (pkt->isLocked()) { // it's a store conditional... have to check for matching // load locked. bool success = false; diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index bc1a8a775..307c851a2 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -70,10 +70,6 @@ #include "base/compression/null_compression.hh" #include "base/compression/lzss_compression.hh" -// MissQueue Templates -#include "mem/cache/miss/miss_queue.hh" -#include "mem/cache/miss/blocking_buffer.hh" - // Coherence Templates #include "mem/cache/coherence/simple_coherence.hh" @@ -207,13 +203,9 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) else { \ BUILD_NULL_PREFETCHER(TAGS); \ } \ - Cache::Params params(tags, mq, coh, base_params, \ + Cache::Params params(tags, coh, base_params, \ pf, prefetch_access, latency, \ true, \ - store_compressed, \ - adaptive_compression, \ - compressed_bus, \ - compAlg, compression_latency, \ prefetch_miss); \ Cache *retval = \ new Cache(getInstanceName(), params); \ @@ -301,8 +293,6 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) } while (0) #define BUILD_COHERENCE(b) do { \ - SimpleCoherence *coh = new SimpleCoherence(protocol); \ - BUILD_CACHES(SimpleCoherence); \ } while (0) #if defined(USE_TAGGED) @@ -369,8 +359,9 @@ CREATE_SIM_OBJECT(BaseCache) } // Build BaseCache param object - BaseCache::Params base_params(addr_range, latency, - block_size, max_miss_count); + BaseCache::Params base_params(latency, block_size, + mshrs, tgts_per_mshr, write_buffers, + max_miss_count); //Warnings about prefetcher policy if (pf_policy == "none" && (prefetch_miss || prefetch_access)) { @@ -408,14 +399,8 @@ CREATE_SIM_OBJECT(BaseCache) const void *repl = NULL; #endif - if (mshrs == 1 /*|| out_bus->doEvents() == false*/) { - BlockingBuffer *mq = new BlockingBuffer(true); - BUILD_COHERENCE(BlockingBuffer); - } else { - MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers, - true, prefetch_miss); - BUILD_COHERENCE(MissQueue); - } + SimpleCoherence *coh = new SimpleCoherence(protocol); + BUILD_CACHES(SimpleCoherence); return NULL; } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a7f96603e..0f66e613c 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -37,17 +37,8 @@ * Cache definitions. */ -#include -#include - -#include -#include -#include -#include - #include "sim/host.hh" #include "base/misc.hh" -#include "cpu/smt.hh" #include "mem/cache/cache.hh" #include "mem/cache/cache_blk.hh" @@ -56,25 +47,16 @@ #include "sim/sim_exit.hh" // for SimExitEvent -bool SIGNAL_NACK_HACK; template Cache::Cache(const std::string &_name, Cache::Params ¶ms) : BaseCache(_name, params.baseParams), prefetchAccess(params.prefetchAccess), - tags(params.tags), missQueue(params.missQueue), + tags(params.tags), coherence(params.coherence), prefetcher(params.prefetcher), - hitLatency(params.hitLatency), - compressionAlg(params.compressionAlg), - blkSize(params.blkSize), doFastWrites(params.doFastWrites), - prefetchMiss(params.prefetchMiss), - storeCompressed(params.storeCompressed), - compressOnWriteback(params.compressOnWriteback), - compLatency(params.compLatency), - adaptiveCompression(params.adaptiveCompression), - writebackCompressed(params.writebackCompressed) + prefetchMiss(params.prefetchMiss) { cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this); memSidePort = new MemSidePort(_name + "-mem_side_port", this); @@ -82,12 +64,8 @@ Cache::Cache(const std::string &_name, memSidePort->setOtherPort(cpuSidePort); tags->setCache(this); - missQueue->setCache(this); - missQueue->setPrefetcher(prefetcher); coherence->setCache(this); prefetcher->setCache(this); - invalidateReq = new Request((Addr) NULL, blkSize, 0); - invalidatePkt = new Packet(invalidateReq, MemCmd::InvalidateReq, 0); } template @@ -96,51 +74,221 @@ Cache::regStats() { BaseCache::regStats(); tags->regStats(name()); - missQueue->regStats(name()); coherence->regStats(name()); prefetcher->regStats(name()); } template -typename Cache::BlkType* -Cache::handleAccess(PacketPtr &pkt, int & lat, - PacketList & writebacks, bool update) +Port * +Cache::getPort(const std::string &if_name, int idx) +{ + if (if_name == "" || if_name == "cpu_side") { + return cpuSidePort; + } else if (if_name == "mem_side") { + return memSidePort; + } else if (if_name == "functional") { + return new CpuSidePort(name() + "-cpu_side_funcport", this); + } else { + panic("Port name %s unrecognized\n", if_name); + } +} + +template +void +Cache::deletePortRefs(Port *p) +{ + if (cpuSidePort == p || memSidePort == p) + panic("Can only delete functional ports\n"); + + delete p; +} + + +template +void +Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) { - // Set the block offset here + uint64_t overwrite_val; + bool overwrite_mem; + uint64_t condition_val64; + uint32_t condition_val32; + int offset = tags->extractBlkOffset(pkt->getAddr()); + uint8_t *blk_data = blk->data + offset; + + assert(sizeof(uint64_t) >= pkt->getSize()); + + overwrite_mem = true; + // keep a copy of our possible write value, and copy what is at the + // memory address into the packet + pkt->writeData((uint8_t *)&overwrite_val); + pkt->setData(blk_data); + + if (pkt->req->isCondSwap()) { + if (pkt->getSize() == sizeof(uint64_t)) { + condition_val64 = pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val64, blk_data, + sizeof(uint64_t)); + } else if (pkt->getSize() == sizeof(uint32_t)) { + condition_val32 = (uint32_t)pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val32, blk_data, + sizeof(uint32_t)); + } else + panic("Invalid size for conditional read/write\n"); + } + + if (overwrite_mem) + std::memcpy(blk_data, &overwrite_val, pkt->getSize()); +} + + +///////////////////////////////////////////////////// +// +// MSHR helper functions +// +///////////////////////////////////////////////////// + + +template +MSHR * +Cache::allocateBuffer(PacketPtr pkt, Tick time, + bool isFill, bool requestBus) +{ + int size = isFill ? blkSize : pkt->getSize(); + Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr(); + + MSHR *mshr = NULL; + + if (pkt->isWrite()) { + /** + * @todo Add write merging here. + */ + mshr = writeBuffer.allocate(addr, size, pkt, isFill); + mshr->order = order++; - BlkType *blk = NULL; - if (update) { - blk = tags->findBlock(pkt->getAddr(), lat); + if (writeBuffer.isFull()) { + setBlocked(Blocked_NoWBBuffers); + } + + if (requestBus) { + requestMemSideBus(Request_WB, time); + } } else { - blk = tags->findBlock(pkt->getAddr()); - lat = 0; + mshr = mshrQueue.allocate(addr, size, pkt, isFill); + mshr->order = order++; + if (mshrQueue.isFull()) { + setBlocked(Blocked_NoMSHRs); + } + if (requestBus) { + requestMemSideBus(Request_MSHR, time); + } } - if (blk != NULL) { - if (!update) { + assert(mshr != NULL); + return mshr; +} - if (pkt->isWrite()){ - assert(offset < blkSize); - assert(pkt->getSize() <= blkSize); - assert(offset+pkt->getSize() <= blkSize); - std::memcpy(blk->data + offset, pkt->getPtr(), - pkt->getSize()); - } else if (pkt->isReadWrite()) { - cmpAndSwap(blk, pkt); - } else if (!(pkt->flags & SATISFIED)) { - pkt->flags |= SATISFIED; - pkt->result = Packet::Success; - assert(offset < blkSize); - assert(pkt->getSize() <= blkSize); - assert(offset + pkt->getSize() <=blkSize); - std::memcpy(pkt->getPtr(), blk->data + offset, - pkt->getSize()); + +template +void +Cache::markInService(MSHR *mshr) +{ + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + /** + * @todo Should include MSHRQueue pointer in MSHR to select the correct + * one. + */ + if (mshr->queue == &writeBuffer) { + // Forwarding a write/ writeback, don't need to change + // the command + unblock = writeBuffer.isFull(); + writeBuffer.markInService(mshr); + if (!writeBuffer.havePending()){ + deassertMemSideBusRequest(Request_WB); + } + if (unblock) { + // Do we really unblock? + unblock = !writeBuffer.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + assert(mshr->queue == &mshrQueue); + unblock = mshrQueue.isFull(); + mshrQueue.markInService(mshr); + if (!mshrQueue.havePending()){ + deassertMemSideBusRequest(Request_MSHR); + } +#if 0 + if (mshr->originalCmd == MemCmd::HardPFReq) { + DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", + name()); + //Also clear pending if need be + if (!prefetcher->havePending()) + { + deassertMemSideBusRequest(Request_PF); } - return blk; } +#endif + if (unblock) { + unblock = !mshrQueue.isFull(); + cause = Blocked_NoMSHRs; + } + } + if (unblock) { + clearBlocked(cause); + } +} + + +template +void +Cache::squash(int threadNum) +{ + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; - // Hit + if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) { + noTargetMSHR = NULL; + unblock = true; + cause = Blocked_NoTargets; + } + if (mshrQueue.isFull()) { + unblock = true; + cause = Blocked_NoMSHRs; + } + mshrQueue.squash(threadNum); + if (!mshrQueue.havePending()) { + deassertMemSideBusRequest(Request_MSHR); + } + if (unblock && !mshrQueue.isFull()) { + clearBlocked(cause); + } +} + +///////////////////////////////////////////////////// +// +// Access path: requests coming in from the CPU side +// +///////////////////////////////////////////////////// + +template +bool +Cache::access(PacketPtr pkt, BlkType *blk, int &lat) +{ + bool satisfied = false; // assume the worst + + if (prefetchAccess) { + //We are determining prefetches on access stream, call prefetcher + prefetcher->handleMiss(pkt, curTick); + } + + DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), + (blk) ? "hit" : "miss"); + + if (blk != NULL) { + // HIT if (blk->isPrefetch()) { //Signal that this was a hit under prefetch (no need for //use prefetch (only can get here if true) @@ -154,639 +302,620 @@ Cache::handleAccess(PacketPtr &pkt, int & lat, } } - if ((pkt->isReadWrite() && blk->isWritable()) || - (pkt->isWrite() && blk->isWritable()) || - (pkt->isRead() && blk->isValid())) { - - // We are satisfying the request - pkt->flags |= SATISFIED; - - if (blk->isCompressed()) { - // If the data is compressed, need to increase the latency - lat += (compLatency/4); - } - - bool write_data = false; - - assert(verifyData(blk)); - - assert(offset < blkSize); - assert(pkt->getSize() <= blkSize); - assert(offset+pkt->getSize() <= blkSize); + if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) { + // OK to satisfy access + hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + satisfied = true; - if (pkt->isWrite()) { - if (blk->checkWrite(pkt->req)) { - write_data = true; + if (pkt->cmd == MemCmd::SwapReq) { + cmpAndSwap(blk, pkt); + } else if (pkt->isWrite()) { + if (blk->checkWrite(pkt)) { blk->status |= BlkDirty; - std::memcpy(blk->data + offset, pkt->getPtr(), - pkt->getSize()); + pkt->writeDataToBlock(blk->data, blkSize); } - } else if (pkt->isReadWrite()) { - cmpAndSwap(blk, pkt); } else { assert(pkt->isRead()); - if (pkt->req->isLocked()) { - blk->trackLoadLocked(pkt->req); + if (pkt->isLocked()) { + blk->trackLoadLocked(pkt); } - std::memcpy(pkt->getPtr(), blk->data + offset, - pkt->getSize()); - } - - if (write_data || - (adaptiveCompression && blk->isCompressed())) - { - // If we wrote data, need to update the internal block - // data. - updateData(blk, writebacks, - !(adaptiveCompression && - blk->isReferenced())); + pkt->setDataFromBlock(blk->data, blkSize); } } else { - // permission violation, treat it as a miss - blk = NULL; + // permission violation... nothing to do here, leave unsatisfied + // for statistics purposes this counts like a complete miss + incMissCount(pkt); } } else { // complete miss (no matching block) - if (pkt->req->isLocked() && pkt->isWrite()) { + incMissCount(pkt); + + if (pkt->isLocked() && pkt->isWrite()) { // miss on store conditional... just give up now pkt->req->setExtraData(0); - pkt->flags |= SATISFIED; + satisfied = true; } } - return blk; + return satisfied; } -template -void -Cache::cmpAndSwap(BlkType *blk, PacketPtr &pkt){ - uint64_t overwrite_val; - bool overwrite_mem; - uint64_t condition_val64; - uint32_t condition_val32; - - int offset = tags->extractBlkOffset(pkt->getAddr()); - - assert(sizeof(uint64_t) >= pkt->getSize()); - - overwrite_mem = true; - // keep a copy of our possible write value, and copy what is at the - // memory address into the packet - std::memcpy(&overwrite_val, pkt->getPtr(), pkt->getSize()); - std::memcpy(pkt->getPtr(), blk->data + offset, - pkt->getSize()); - - if (pkt->req->isCondSwap()) { - if (pkt->getSize() == sizeof(uint64_t)) { - condition_val64 = pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val64, blk->data + offset, - sizeof(uint64_t)); - } else if (pkt->getSize() == sizeof(uint32_t)) { - condition_val32 = (uint32_t)pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val32, blk->data + offset, - sizeof(uint32_t)); - } else - panic("Invalid size for conditional read/write\n"); - } - - if (overwrite_mem) - std::memcpy(blk->data + offset, - &overwrite_val, pkt->getSize()); - -} template -typename Cache::BlkType* -Cache::handleFill(BlkType *blk, PacketPtr &pkt, - CacheBlk::State new_state, - PacketList & writebacks, - PacketPtr target) +bool +Cache::timingAccess(PacketPtr pkt) { -#ifndef NDEBUG - BlkType *tmp_blk = tags->findBlock(pkt->getAddr()); - assert(tmp_blk == blk); -#endif - blk = doReplacement(blk, pkt, new_state, writebacks); +//@todo Add back in MemDebug Calls +// MemDebug::cacheAccess(pkt); + // we charge hitLatency for doing just about anything here + Tick time = curTick + hitLatency; - if (pkt->isRead()) { - std::memcpy(blk->data, pkt->getPtr(), blkSize); + if (pkt->req->isUncacheable()) { + allocateBuffer(pkt, time, false, true); + assert(pkt->needsResponse()); // else we should delete it here?? + return true; } - blk->whenReady = pkt->finishTime; + PacketList writebacks; + int lat = hitLatency; + BlkType *blk = tags->findBlock(pkt->getAddr(), lat); + bool satisfied = false; + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - // Respond to target, if any - if (target) { + MSHR *mshr = mshrQueue.findMatch(blk_addr); - target->flags |= SATISFIED; + if (!mshr) { + // no outstanding access to this block, look up in cache + // (otherwise if we allow reads while there's an outstanding + // write miss, the read could return stale data out of the + // cache block... a more aggressive system could detect the + // overlap (if any) and forward data out of the MSHRs, but we + // don't do that yet) + satisfied = access(pkt, blk, lat); + } - if (target->cmd == MemCmd::InvalidateReq) { - tags->invalidateBlk(blk); - blk = NULL; +#if 0 + // If this is a block size write/hint (WH64) allocate the block here + // if the coherence protocol allows it. + /** @todo make the fast write alloc (wh64) work with coherence. */ + /** @todo Do we want to do fast writes for writebacks as well? */ + if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && + (pkt->cmd == MemCmd::WriteReq + || pkt->cmd == MemCmd::WriteInvalidateReq) ) { + // not outstanding misses, can do this + MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr()); + if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) { + if (outstanding_miss) { + warn("WriteInv doing a fastallocate" + "with an outstanding miss to the same address\n"); + } + blk = handleFill(NULL, pkt, BlkValid | BlkWritable, + writebacks); + ++fastWrites; } + } +#endif - if (blk && ((target->isWrite() || target->isReadWrite()) ? - blk->isWritable() : blk->isValid())) { - assert(target->isWrite() || target->isReadWrite() || target->isRead()); - assert(target->getOffset(blkSize) + target->getSize() <= blkSize); - if (target->isWrite()) { - if (blk->checkWrite(pkt->req)) { - blk->status |= BlkDirty; - std::memcpy(blk->data + target->getOffset(blkSize), - target->getPtr(), target->getSize()); - } - } else if (target->isReadWrite()) { - cmpAndSwap(blk, target); - } else { - if (pkt->req->isLocked()) { - blk->trackLoadLocked(pkt->req); - } - std::memcpy(target->getPtr(), - blk->data + target->getOffset(blkSize), - target->getSize()); + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateBuffer(wbPkt, time, false, true); + writebacks.pop_front(); + } + + bool needsResponse = pkt->needsResponse(); + + if (satisfied) { + assert(needsResponse); + pkt->makeTimingResponse(); + cpuSidePort->respond(pkt, curTick+lat); + } else { + // miss + if (prefetchMiss) + prefetcher->handleMiss(pkt, time); + + if (mshr) { + // MSHR hit + //@todo remove hw_pf here + mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { + mshr->threadNum = -1; } + mshr->allocateTarget(pkt, true); + if (mshr->getNumTargets() == numTarget) { + noTargetMSHR = mshr; + setBlocked(Blocked_NoTargets); + mshrQueue.moveToFront(mshr); + } + } else { + // no MSHR + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + // always mark as cache fill for now... if we implement + // no-write-allocate or bypass accesses this will have to + // be changed. + allocateBuffer(pkt, time, true, true); } } - if (blk) { - // Need to write the data into the block - updateData(blk, writebacks, !adaptiveCompression || true); + if (!needsResponse) { + // Need to clean up the packet on a writeback miss, but leave + // the request for the next level. + delete pkt; } - return blk; + + return true; } + template -typename Cache::BlkType* -Cache::handleFill(BlkType *blk, MSHR * mshr, - CacheBlk::State new_state, - PacketList & writebacks, PacketPtr pkt) +Tick +Cache::atomicAccess(PacketPtr pkt) { -/* -#ifndef NDEBUG - BlkType *tmp_blk = findBlock(mshr->pkt->getAddr()); - assert(tmp_blk == blk); -#endif - PacketPtr pkt = mshr->pkt;*/ - blk = doReplacement(blk, pkt, new_state, writebacks); + // should assert here that there are no outstanding MSHRs or + // writebacks... that would mean that someone used an atomic + // access in timing mode - if (pkt->isRead()) { - std::memcpy(blk->data, pkt->getPtr(), blkSize); + if (pkt->req->isUncacheable()) { + // Uncacheables just go through + return memSidePort->sendAtomic(pkt); } - blk->whenReady = pkt->finishTime; + PacketList writebacks; + int lat = hitLatency; + BlkType *blk = tags->findBlock(pkt->getAddr(), lat); + bool satisfied = access(pkt, blk, lat); + if (!satisfied) { + // MISS + CacheBlk::State old_state = (blk) ? blk->status : 0; + MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state); + Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize); + busPkt.allocate(); - // respond to MSHR targets, if any + DPRINTF(Cache, "Sending a atomic %s for %x\n", + busPkt.cmdString(), busPkt.getAddr()); - // First offset for critical word first calculations - int initial_offset = 0; + lat += memSidePort->sendAtomic(&busPkt); - if (mshr->hasTargets()) { - initial_offset = mshr->getTarget()->getOffset(blkSize); + DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", + busPkt.cmdString(), busPkt.getAddr(), old_state); + + blk = handleFill(&busPkt, blk, writebacks); + bool status = satisfyCpuSideRequest(pkt, blk); + assert(status); } - while (mshr->hasTargets()) { - PacketPtr target = mshr->getTarget(); + // We now have the block one way or another (hit or completed miss) - target->flags |= SATISFIED; + // Handle writebacks if needed + while (!writebacks.empty()){ + PacketPtr wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); + writebacks.pop_front(); + delete wbPkt; + } - // How many bytes pass the first request is this one - int transfer_offset = target->getOffset(blkSize) - initial_offset; - if (transfer_offset < 0) { - transfer_offset += blkSize; - } + if (pkt->needsResponse()) { + pkt->makeAtomicResponse(); + pkt->result = Packet::Success; + } - // If critical word (no offset) return first word time - Tick completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; + return lat; +} - if (target->cmd == MemCmd::InvalidateReq) { - //Mark the blk as invalid now, if it hasn't been already - if (blk) { - tags->invalidateBlk(blk); - blk = NULL; - } - //Also get rid of the invalidate - mshr->popTarget(); +template +void +Cache::functionalAccess(PacketPtr pkt, + CachePort *otherSidePort) +{ + Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); + BlkType *blk = tags->findBlock(pkt->getAddr()); - DPRINTF(Cache, "Popping off a Invalidate for addr %x\n", - pkt->getAddr()); + if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) { + // request satisfied from block + return; + } - continue; - } + // Need to check for outstanding misses and writes - if (blk && ((target->isWrite() || target->isReadWrite()) ? - blk->isWritable() : blk->isValid())) { - assert(target->isWrite() || target->isRead() || target->isReadWrite() ); - assert(target->getOffset(blkSize) + target->getSize() <= blkSize); - if (target->isWrite()) { - if (blk->checkWrite(pkt->req)) { - blk->status |= BlkDirty; - std::memcpy(blk->data + target->getOffset(blkSize), - target->getPtr(), target->getSize()); - } - } else if (target->isReadWrite()) { - cmpAndSwap(blk, target); - } else { - if (target->req->isLocked()) { - blk->trackLoadLocked(target->req); - } - std::memcpy(target->getPtr(), - blk->data + target->getOffset(blkSize), - target->getSize()); - } - } else { - // Invalid access, need to do another request - // can occur if block is invalidated, or not correct - // permissions -// mshr->pkt = pkt; - break; - } - if (!target->req->isUncacheable()) { - missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->time; + // There can only be one matching outstanding miss. + MSHR *mshr = mshrQueue.findMatch(blk_addr); + if (mshr) { + MSHR::TargetList *targets = mshr->getTargetList(); + MSHR::TargetList::iterator i = targets->begin(); + MSHR::TargetList::iterator end = targets->end(); + for (; i != end; ++i) { + PacketPtr targetPkt = i->pkt; + if (pkt->checkFunctional(targetPkt)) + return; } - respond(target, completion_time); - mshr->popTarget(); } - if (blk) { - // Need to write the data into the block - updateData(blk, writebacks, !adaptiveCompression || true); + // There can be many matching outstanding writes. + std::vector writes; + writeBuffer.findMatches(blk_addr, writes); + for (int i = 0; i < writes.size(); ++i) { + MSHR *mshr = writes[i]; + if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData)) + return; } - return blk; + otherSidePort->checkAndSendFunctional(pkt); } +///////////////////////////////////////////////////// +// +// Response handling: responses from the memory side +// +///////////////////////////////////////////////////// + template void -Cache::handleSnoop(BlkType *blk, - CacheBlk::State new_state, - PacketPtr &pkt) +Cache::handleResponse(PacketPtr pkt, Tick time) { - //Must have the block to supply - assert(blk); - // Can only supply data, and if it hasn't already been supllied - assert(pkt->isRead()); - assert(!(pkt->flags & SATISFIED)); - pkt->flags |= SATISFIED; - Addr offset = pkt->getOffset(blkSize); - assert(offset < blkSize); - assert(pkt->getSize() <= blkSize); - assert(offset + pkt->getSize() <=blkSize); - std::memcpy(pkt->getPtr(), blk->data + offset, pkt->getSize()); - - handleSnoop(blk, new_state); + MSHR *mshr = dynamic_cast(pkt->senderState); +#ifndef NDEBUG + int num_targets = mshr->getNumTargets(); +#endif + + bool unblock = false; + bool unblock_target = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (mshr->isCacheFill) { +#if 0 + mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += + curTick - pkt->time; +#endif + // targets were handled in the cache tags + if (mshr == noTargetMSHR) { + // we always clear at least one target + unblock_target = true; + cause = Blocked_NoTargets; + noTargetMSHR = NULL; + } + + if (mshr->hasTargets()) { + // Didn't satisfy all the targets, need to resend + mshrQueue.markPending(mshr); + mshr->order = order++; + requestMemSideBus(Request_MSHR, time); + } + else { + unblock = mshrQueue.isFull(); + mshrQueue.deallocate(mshr); + if (unblock) { + unblock = !mshrQueue.isFull(); + cause = Blocked_NoMSHRs; + } + } + } else { + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += + curTick - pkt->time; + } + if (mshr->hasTargets() && pkt->req->isUncacheable()) { + // Should only have 1 target if we had any + assert(num_targets == 1); + MSHR::Target *target = mshr->getTarget(); + assert(target->cpuSide); + mshr->popTarget(); + if (pkt->isRead()) { + target->pkt->setData(pkt->getPtr()); + } + cpuSidePort->respond(target->pkt, time); + assert(!mshr->hasTargets()); + } + else if (mshr->hasTargets()) { + //Must be a no_allocate with possibly more than one target + assert(!mshr->isCacheFill); + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); + assert(target->isCpuSide()); + mshr->popTarget(); + if (pkt->isRead()) { + target->pkt->setData(pkt->getPtr()); + } + cpuSidePort->respond(target->pkt, time); + } + } + + if (pkt->isWrite()) { + // If the wrtie buffer is full, we might unblock now + unblock = writeBuffer.isFull(); + writeBuffer.deallocate(mshr); + if (unblock) { + // Did we really unblock? + unblock = !writeBuffer.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mshrQueue.isFull(); + mshrQueue.deallocate(mshr); + if (unblock) { + unblock = !mshrQueue.isFull(); + cause = Blocked_NoMSHRs; + } + } + } + if (unblock || unblock_target) { + clearBlocked(cause); + } } + template void -Cache::handleSnoop(BlkType *blk, - CacheBlk::State new_state) +Cache::handleResponse(PacketPtr pkt) { - if (blk && blk->status != new_state) { - if ((new_state && BlkValid) == 0) { - tags->invalidateBlk(blk); - } else { - assert(new_state >= 0 && new_state < 128); - blk->status = new_state; + Tick time = curTick + hitLatency; + MSHR *mshr = dynamic_cast(pkt->senderState); + assert(mshr); + if (pkt->result == Packet::Nacked) { + //pkt->reinitFromRequest(); + warn("NACKs from devices not connected to the same bus " + "not implemented\n"); + return; + } + assert(pkt->result != Packet::BadAddress); + assert(pkt->result == Packet::Success); + DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); + + if (mshr->isCacheFill) { + DPRINTF(Cache, "Block for addr %x being updated in Cache\n", + pkt->getAddr()); + BlkType *blk = tags->findBlock(pkt->getAddr()); + PacketList writebacks; + blk = handleFill(pkt, blk, writebacks); + satisfyMSHR(mshr, pkt, blk); + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateBuffer(wbPkt, time, false, true); + writebacks.pop_front(); } } + handleResponse(pkt, time); } + + + template PacketPtr Cache::writebackBlk(BlkType *blk) { - assert(blk && blk->isValid() && blk->isModified()); - int data_size = blkSize; - data_size = blk->size; - if (compressOnWriteback) { - // not already compressed - // need to compress to ship it - assert(data_size == blkSize); - uint8_t *tmp_data = new uint8_t[blkSize]; - data_size = compressionAlg->compress(tmp_data,blk->data, - data_size); - delete [] tmp_data; - } + assert(blk && blk->isValid() && blk->isDirty()); -/* PacketPtr writeback = - buildWritebackReq(tags->regenerateBlkAddr(blk->tag, blk->set), - blk->asid, blkSize, - blk->data, data_size); -*/ + writebacks[0/*pkt->req->getThreadNum()*/]++; Request *writebackReq = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0); PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1); writeback->allocate(); - std::memcpy(writeback->getPtr(),blk->data,blkSize); + std::memcpy(writeback->getPtr(), blk->data, blkSize); blk->status &= ~BlkDirty; return writeback; } -template -bool -Cache::verifyData(BlkType *blk) -{ - bool retval; - // The data stored in the blk - uint8_t *blk_data = new uint8_t[blkSize]; - tags->readData(blk, blk_data); - // Pointer for uncompressed data, assumed uncompressed - uint8_t *tmp_data = blk_data; - // The size of the data being stored, assumed uncompressed - int data_size = blkSize; - - // If the block is compressed need to uncompress to access - if (blk->isCompressed()){ - // Allocate new storage for the data - tmp_data = new uint8_t[blkSize]; - data_size = compressionAlg->uncompress(tmp_data,blk_data, blk->size); - assert(data_size == blkSize); - // Don't need to keep blk_data around - delete [] blk_data; - } else { - assert(blkSize == blk->size); - } - - retval = std::memcmp(tmp_data, blk->data, blkSize) == 0; - delete [] tmp_data; - return retval; -} - -template -void -Cache::updateData(BlkType *blk, PacketList &writebacks, - bool compress_block) -{ - if (storeCompressed && compress_block) { - uint8_t *comp_data = new uint8_t[blkSize]; - int new_size = compressionAlg->compress(comp_data, blk->data, blkSize); - if (new_size > (blkSize - tags->getSubBlockSize())){ - // no benefit to storing it compressed - blk->status &= ~BlkCompressed; - tags->writeData(blk, blk->data, blkSize, - writebacks); - } else { - // Store the data compressed - blk->status |= BlkCompressed; - tags->writeData(blk, comp_data, new_size, - writebacks); - } - delete [] comp_data; - } else { - blk->status &= ~BlkCompressed; - tags->writeData(blk, blk->data, blkSize, writebacks); - } -} - +// Note that the reason we return a list of writebacks rather than +// inserting them directly in the write buffer is that this function +// is called by both atomic and timing-mode accesses, and in atomic +// mode we don't mess with the write buffer (we just perform the +// writebacks atomically once the original request is complete). template typename Cache::BlkType* -Cache::doReplacement(BlkType *blk, PacketPtr &pkt, - CacheBlk::State new_state, - PacketList &writebacks) +Cache::handleFill(PacketPtr pkt, BlkType *blk, + PacketList &writebacks) { + Addr addr = pkt->getAddr(); + if (blk == NULL) { + // need to do a replacement - BlkList compress_list; - blk = tags->findReplacement(pkt, writebacks, compress_list); - while (adaptiveCompression && !compress_list.empty()) { - updateData(compress_list.front(), writebacks, true); - compress_list.pop_front(); - } + blk = tags->findReplacement(addr, writebacks); if (blk->isValid()) { DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", - tags->regenerateBlkAddr(blk->tag,blk->set), pkt->getAddr(), - (blk->isModified()) ? "writeback" : "clean"); + tags->regenerateBlkAddr(blk->tag, blk->set), addr, + blk->isDirty() ? "writeback" : "clean"); - if (blk->isModified()) { - // Need to write the data back + if (blk->isDirty()) { + // Save writeback packet for handling by caller writebacks.push_back(writebackBlk(blk)); } } - blk->tag = tags->extractTag(pkt->getAddr(), blk); + + blk->tag = tags->extractTag(addr); + blk->status = coherence->getNewState(pkt); + assert(pkt->isRead()); } else { - // must be a status change - // assert(blk->status != new_state); - if (blk->status == new_state) warn("Changing state to same value\n"); + // existing block... probably an upgrade + assert(blk->tag == tags->extractTag(addr)); + // either we're getting new data or the block should already be valid + assert(pkt->isRead() || blk->isValid()); + CacheBlk::State old_state = blk->status; + blk->status = coherence->getNewState(pkt, old_state); + if (blk->status != old_state) + DPRINTF(Cache, "Block addr %x moving from state %i to %i\n", + addr, old_state, blk->status); + else + warn("Changing state to same value\n"); } - blk->status = new_state; + // if we got new data, copy it in + if (pkt->isRead()) { + std::memcpy(blk->data, pkt->getPtr(), blkSize); + } + + blk->whenReady = pkt->finishTime; + return blk; } template bool -Cache::access(PacketPtr &pkt) +Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) { -//@todo Add back in MemDebug Calls -// MemDebug::cacheAccess(pkt); - BlkType *blk = NULL; - PacketList writebacks; - int size = blkSize; - int lat = hitLatency; - if (prefetchAccess) { - //We are determining prefetches on access stream, call prefetcher - prefetcher->handleMiss(pkt, curTick); - } - - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - - if (!pkt->req->isUncacheable()) { - if (!missQueue->findMSHR(blk_addr)) { - blk = handleAccess(pkt, lat, writebacks); - } - } else { - size = pkt->getSize(); - } - // If this is a block size write/hint (WH64) allocate the block here - // if the coherence protocol allows it. - /** @todo make the fast write alloc (wh64) work with coherence. */ - /** @todo Do we want to do fast writes for writebacks as well? */ - if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && - (pkt->cmd == MemCmd::WriteReq - || pkt->cmd == MemCmd::WriteInvalidateReq) ) { - // not outstanding misses, can do this - MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr()); - if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) { - if (outstanding_miss) { - warn("WriteInv doing a fastallocate" - "with an outstanding miss to the same address\n"); + if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) { + assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); + assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); + + if (pkt->isWrite()) { + if (blk->checkWrite(pkt)) { + blk->status |= BlkDirty; + pkt->writeDataToBlock(blk->data, blkSize); } - blk = handleFill(NULL, pkt, BlkValid | BlkWritable, - writebacks); - ++fastWrites; + } else if (pkt->isReadWrite()) { + cmpAndSwap(blk, pkt); + } else { + if (pkt->isLocked()) { + blk->trackLoadLocked(pkt); + } + pkt->setDataFromBlock(blk->data, blkSize); } + + return true; + } else { + return false; } - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - missQueue->doWriteback(wbPkt); - writebacks.pop_front(); - delete wbPkt; - } +} + + +template +bool +Cache::satisfyTarget(MSHR::Target *target, BlkType *blk) +{ + assert(target != NULL); + assert(target->isCpuSide()); + return satisfyCpuSideRequest(target->pkt, blk); +} + +template +void +Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, + BlkType *blk) +{ + // respond to MSHR targets, if any - DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), - (blk) ? "hit" : "miss"); + // First offset for critical word first calculations + int initial_offset = 0; - if (blk) { - // Hit - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - // clear dirty bit if write through - respond(pkt, curTick+lat); - return true; + if (mshr->hasTargets()) { + initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); } - // Miss - if (!pkt->req->isUncacheable()) { - misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - /** @todo Move miss count code into BaseCache */ - if (missCount) { - --missCount; - if (missCount == 0) - exitSimLoop("A cache reached the maximum miss count"); - } - } + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); - if (pkt->flags & SATISFIED) { - // happens when a store conditional fails because it missed - // the cache completely - respond(pkt, curTick+lat); - } else { - missQueue->handleMiss(pkt, size, curTick + hitLatency); - } + if (!satisfyTarget(target, blk)) { + // Invalid access, need to do another request + // can occur if block is invalidated, or not correct + // permissions + break; + } - if (!pkt->needsResponse()) { - //Need to clean up the packet on a writeback miss, but leave the request - //for the next level. - delete pkt; - } - return true; -} + // How many bytes pass the first request is this one + int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; + if (transfer_offset < 0) { + transfer_offset += blkSize; + } + // If critical word (no offset) return first word time + Tick completion_time = tags->getHitLatency() + + transfer_offset ? pkt->finishTime : pkt->firstWordTime; -template -PacketPtr -Cache::getPacket() -{ - assert(missQueue->havePending()); - PacketPtr pkt = missQueue->getPacket(); - if (pkt) { - if (!pkt->req->isUncacheable()) { - if (pkt->cmd == MemCmd::HardPFReq) - misses[MemCmd::HardPFReq][0/*pkt->req->getThreadNum()*/]++; - BlkType *blk = tags->findBlock(pkt->getAddr()); - MemCmd cmd = - coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0); - missQueue->setBusCmd(pkt, cmd); + if (!target->pkt->req->isUncacheable()) { + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->time; } + target->pkt->makeTimingResponse(); + cpuSidePort->respond(target->pkt, completion_time); + mshr->popTarget(); } - - assert(!isMemSideBusRequested() || missQueue->havePending()); - assert(!pkt || pkt->time <= curTick); - SIGNAL_NACK_HACK = false; - return pkt; } + +///////////////////////////////////////////////////// +// +// Snoop path: requests coming in from the memory side +// +///////////////////////////////////////////////////// + template void -Cache::sendResult(PacketPtr &pkt, MSHR* mshr, - bool success) +Cache::doTimingSupplyResponse(PacketPtr req_pkt, + uint8_t *blk_data) { - if (success && !(SIGNAL_NACK_HACK)) { - //Remember if it was an upgrade because writeback MSHR's are removed - //in Mark in Service - bool upgrade = (mshr->pkt && mshr->pkt->cmd == MemCmd::UpgradeReq); - - missQueue->markInService(mshr->pkt, mshr); - - //Temp Hack for UPGRADES - if (upgrade) { - assert(pkt); //Upgrades need to be fixed - pkt->flags &= ~CACHE_LINE_FILL; - BlkType *blk = tags->findBlock(pkt->getAddr()); - CacheBlk::State old_state = (blk) ? blk->status : 0; - CacheBlk::State new_state = coherence->getNewState(pkt,old_state); - if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from state " - "%i to %i\n", pkt->getAddr(), old_state, new_state); - //Set the state on the upgrade - std::memcpy(pkt->getPtr(), blk->data, blkSize); - PacketList writebacks; - handleFill(blk, mshr, new_state, writebacks, pkt); - assert(writebacks.empty()); - missQueue->handleResponse(pkt, curTick + hitLatency); - } - } else if (pkt && !pkt->req->isUncacheable()) { - pkt->flags &= ~NACKED_LINE; - SIGNAL_NACK_HACK = false; - pkt->flags &= ~SATISFIED; - -//Rmove copy from mshr - delete mshr->pkt; - mshr->pkt = pkt; - - missQueue->restoreOrigCmd(pkt); - } + // timing-mode snoop responses require a new packet + PacketPtr pkt = new Packet(req_pkt); + pkt->allocate(); + pkt->makeTimingResponse(); + pkt->setDataFromBlock(blk_data, blkSize); + memSidePort->respond(pkt, curTick + hitLatency); } template void -Cache::handleResponse(PacketPtr &pkt) +Cache::handleSnoop(PacketPtr pkt, BlkType *blk, + bool is_timing) { - BlkType *blk = NULL; - if (pkt->senderState) { - //Delete temp copy in MSHR, restore it. - delete ((MSHR*)pkt->senderState)->pkt; - ((MSHR*)pkt->senderState)->pkt = pkt; - if (pkt->result == Packet::Nacked) { - //pkt->reinitFromRequest(); - warn("NACKs from devices not connected to the same bus " - "not implemented\n"); - return; - } - if (pkt->result == Packet::BadAddress) { - //Make the response a Bad address and send it + if (!blk || !blk->isValid()) { + return; + } + + // we may end up modifying both the block state and the packet (if + // we respond in atomic mode), so just figure out what to do now + // and then do it later + bool supply = blk->isDirty() && pkt->isRead(); + bool invalidate = pkt->isInvalidate(); + + if (pkt->isRead() && !pkt->isInvalidate()) { + assert(!pkt->needsExclusive()); + pkt->assertShared(); + int bits_to_clear = BlkWritable; + const bool haveOwnershipState = true; // for now + if (!haveOwnershipState) { + // if we don't support pure ownership (dirty && !writable), + // have to clear dirty bit here, assume memory snarfs data + // on cache-to-cache xfer + bits_to_clear |= BlkDirty; } -// MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); - - if (pkt->isCacheFill() && !pkt->isNoAllocate()) { - DPRINTF(Cache, "Block for addr %x being updated in Cache\n", - pkt->getAddr()); - blk = tags->findBlock(pkt->getAddr()); - CacheBlk::State old_state = (blk) ? blk->status : 0; - PacketList writebacks; - CacheBlk::State new_state = coherence->getNewState(pkt,old_state); - if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from " - "state %i to %i\n", - pkt->getAddr(), - old_state, new_state); - blk = handleFill(blk, (MSHR*)pkt->senderState, - new_state, writebacks, pkt); - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - missQueue->doWriteback(wbPkt); - writebacks.pop_front(); - delete wbPkt; - } + blk->status &= ~bits_to_clear; + } + + if (supply) { + pkt->assertMemInhibit(); + if (is_timing) { + doTimingSupplyResponse(pkt, blk->data); + } else { + pkt->makeAtomicResponse(); + pkt->setDataFromBlock(blk->data, blkSize); } - missQueue->handleResponse(pkt, curTick + hitLatency); } + + // Do this last in case it deallocates block data or something + // like that + if (invalidate) { + tags->invalidateBlk(blk); + } + + DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n", + pkt->cmdString(), blockAlign(pkt->getAddr()), + supply ? "supplying data, " : "", blk->status); } template void -Cache::snoop(PacketPtr &pkt) +Cache::snoopTiming(PacketPtr pkt) { if (pkt->req->isUncacheable()) { //Can't get a hit on an uncacheable address @@ -794,351 +923,190 @@ Cache::snoop(PacketPtr &pkt) return; } - ///// PROPAGATE SNOOP UPWARD HERE + BlkType *blk = tags->findBlock(pkt->getAddr()); Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - BlkType *blk = tags->findBlock(pkt->getAddr()); - MSHR *mshr = missQueue->findMSHR(blk_addr); - if (coherence->hasProtocol() || pkt->isInvalidate()) { - //@todo Move this into handle bus req - //If we find an mshr, and it is in service, we need to NACK or - //invalidate - if (mshr) { - if (mshr->inService) { - if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill()) - && (pkt->cmd != MemCmd::InvalidateReq - && pkt->cmd != MemCmd::WriteInvalidateReq)) { - //If the outstanding request was an invalidate - //(upgrade,readex,..) Then we need to ACK the request - //until we get the data Also NACK if the outstanding - //request is not a cachefill (writeback) - assert(!(pkt->flags & SATISFIED)); - pkt->flags |= SATISFIED; - pkt->flags |= NACKED_LINE; - SIGNAL_NACK_HACK = true; - ///@todo NACK's from other levels - //warn("NACKs from devices not connected to the same bus " - //"not implemented\n"); - //respondToSnoop(pkt, curTick + hitLatency); - return; - } - else { - //The supplier will be someone else, because we are - //waiting for the data. This should cause this cache to - //be forced to go to the shared state, not the exclusive - //even though the shared line won't be asserted. But for - //now we will just invlidate ourselves and allow the other - //cache to go into the exclusive state. @todo Make it so - //a read to a pending read doesn't invalidate. @todo Make - //it so that a read to a pending read can't be exclusive - //now. - - //Set the address so find match works - //panic("Don't have invalidates yet\n"); - invalidatePkt->addrOverride(pkt->getAddr()); - - //Append the invalidate on - missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to addr: %x\n", - pkt->getAddr()); - return; + MSHR *mshr = mshrQueue.findMatch(blk_addr); + // better not be snooping a request that conflicts with something + // we have outstanding... + assert(!mshr || !mshr->inService); + + //We also need to check the writeback buffers and handle those + std::vector writebacks; + if (writeBuffer.findMatches(blk_addr, writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n", + pkt->getAddr()); + + //Look through writebacks for any non-uncachable writes, use that + for (int i=0; iisUncacheable()); + + if (pkt->isRead()) { + pkt->assertMemInhibit(); + if (!pkt->needsExclusive()) { + pkt->assertShared(); + } else { + // if we're not asserting the shared line, we need to + // invalidate our copy. we'll do that below as long as + // the packet's invalidate flag is set... + assert(pkt->isInvalidate()); } + doTimingSupplyResponse(pkt, mshr->writeData); } - } - //We also need to check the writeback buffers and handle those - std::vector writebacks; - if (missQueue->findWrites(blk_addr, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n", - pkt->getAddr()); - - //Look through writebacks for any non-uncachable writes, use that - for (int i=0; ipkt->req->isUncacheable()) { - if (pkt->isRead()) { - //Only Upgrades don't get here - //Supply the data - assert(!(pkt->flags & SATISFIED)); - pkt->flags |= SATISFIED; - - //If we are in an exclusive protocol, make it ask again - //to get write permissions (upgrade), signal shared - pkt->flags |= SHARED_LINE; - - assert(pkt->isRead()); - Addr offset = pkt->getAddr() & (blkSize - 1); - assert(offset < blkSize); - assert(pkt->getSize() <= blkSize); - assert(offset + pkt->getSize() <=blkSize); - std::memcpy(pkt->getPtr(), mshr->pkt->getPtr() + offset, pkt->getSize()); - - respondToSnoop(pkt, curTick + hitLatency); - } - - if (pkt->isInvalidate()) { - //This must be an upgrade or other cache will take - //ownership - missQueue->markInService(mshr->pkt, mshr); - } - return; - } + + if (pkt->isInvalidate()) { + // Invalidation trumps our writeback... discard here + assert(0); + markInService(mshr); } + return; } } - CacheBlk::State new_state; - bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); - - if (blk && mshr && !mshr->inService && new_state == 0) { - //There was a outstanding write to a shared block, not need ReadEx - //not update, so change No Allocate param in MSHR - mshr->pkt->flags &= ~NO_ALLOCATE; - } - - if (satisfy) { - DPRINTF(Cache, "Cache snooped a %s request for addr %x and " - "now supplying data, new state is %i\n", - pkt->cmdString(), blk_addr, new_state); - - handleSnoop(blk, new_state, pkt); - respondToSnoop(pkt, curTick + hitLatency); - return; - } - if (blk) - DPRINTF(Cache, "Cache snooped a %s request for addr %x, " - "new state is %i\n", pkt->cmdString(), blk_addr, new_state); - handleSnoop(blk, new_state); + handleSnoop(pkt, blk, true); } + template -void -Cache::snoopResponse(PacketPtr &pkt) +Tick +Cache::snoopAtomic(PacketPtr pkt) { - //Need to handle the response, if NACKED - if (pkt->flags & NACKED_LINE) { - //Need to mark it as not in service, and retry for bus - assert(0); //Yeah, we saw a NACK come through - - //For now this should never get called, we return false when we see a - //NACK instead, by doing this we allow the bus_blocked mechanism to - //handle the retry For now it retrys in just 2 cycles, need to figure - //out how to change that Eventually we will want to also have success - //come in as a parameter Need to make sure that we handle the - //functionality that happens on successufl return of the sendAddr - //function + if (pkt->req->isUncacheable()) { + // Can't get a hit on an uncacheable address + // Revisit this for multi level coherence + return hitLatency; } + + BlkType *blk = tags->findBlock(pkt->getAddr()); + handleSnoop(pkt, blk, false); + return hitLatency; } -/** - * @todo Fix to not assume write allocate - */ template -Tick -Cache::probe(PacketPtr &pkt, bool update, - CachePort* otherSidePort) +MSHR * +Cache::getNextMSHR() { -// MemDebug::cacheProbe(pkt); - if (!pkt->req->isUncacheable()) { - if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) { - //Upgrade or Invalidate, satisfy it, don't forward - DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); - pkt->flags |= SATISFIED; - return 0; - } - } + // Check both MSHR queue and write buffer for potential requests + MSHR *miss_mshr = mshrQueue.getNextMSHR(); + MSHR *write_mshr = writeBuffer.getNextMSHR(); - if (!update && (otherSidePort == cpuSidePort)) { - // Still need to change data in all locations. - otherSidePort->checkAndSendFunctional(pkt); - if (pkt->isRead() && pkt->result == Packet::Success) - return 0; + // Now figure out which one to send... some cases are easy + if (miss_mshr && !write_mshr) { + return miss_mshr; + } + if (write_mshr && !miss_mshr) { + return write_mshr; } - PacketList writebacks; - int lat; - - BlkType *blk = handleAccess(pkt, lat, writebacks, update); - - DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), - pkt->getAddr(), (blk) ? "hit" : "miss"); - - - // Need to check for outstanding misses and writes - Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); - - // There can only be one matching outstanding miss. - MSHR* mshr = missQueue->findMSHR(blk_addr); - - // There can be many matching outstanding writes. - std::vector writes; - missQueue->findWrites(blk_addr, writes); - - if (!update) { - bool notDone = !(pkt->flags & SATISFIED); //Hit in cache (was a block) - // Check for data in MSHR and writebuffer. - if (mshr) { - MSHR::TargetList *targets = mshr->getTargetList(); - MSHR::TargetList::iterator i = targets->begin(); - MSHR::TargetList::iterator end = targets->end(); - for (; i != end && notDone; ++i) { - PacketPtr target = *i; - // If the target contains data, and it overlaps the - // probed request, need to update data - if (target->intersect(pkt)) { - DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a MSHR\n", - pkt->cmdString(), blk_addr); - notDone = fixPacket(pkt, target); - } - } - } - for (int i = 0; i < writes.size() && notDone; ++i) { - PacketPtr write = writes[i]->pkt; - if (write->intersect(pkt)) { - DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a writeback\n", - pkt->cmdString(), blk_addr); - notDone = fixPacket(pkt, write); - } - } - if (notDone && otherSidePort == memSidePort) { - otherSidePort->checkAndSendFunctional(pkt); - assert(pkt->result == Packet::Success); - } - return 0; - } else if (!blk && !(pkt->flags & SATISFIED)) { - // update the cache state and statistics - if (mshr || !writes.empty()){ - // Can't handle it, return request unsatisfied. - panic("Atomic access ran into outstanding MSHR's or WB's!"); - } - if (!pkt->req->isUncacheable() /*Uncacheables just go through*/ - && (pkt->cmd != MemCmd::Writeback)/*Writebacks on miss fall through*/) { - // Fetch the cache block to fill - BlkType *blk = tags->findBlock(pkt->getAddr()); - MemCmd temp_cmd = - coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0); - - PacketPtr busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); - - busPkt->allocate(); - - busPkt->time = curTick; - - DPRINTF(Cache, "Sending a atomic %s for %x\n", - busPkt->cmdString(), busPkt->getAddr()); - - lat = memSidePort->sendAtomic(busPkt); - - //Be sure to flip the response to a request for coherence - if (busPkt->needsResponse()) { - busPkt->makeAtomicResponse(); + if (miss_mshr && write_mshr) { + // We have one of each... normally we favor the miss request + // unless the write buffer is full + if (writeBuffer.isFull() && writeBuffer.inServiceEntries == 0) { + // Write buffer is full, so we'd like to issue a write; + // need to search MSHR queue for conflicting earlier miss. + MSHR *conflict_mshr = + mshrQueue.findPending(write_mshr->addr, write_mshr->size); + + if (conflict_mshr && conflict_mshr->order < write_mshr->order) { + // Service misses in order until conflict is cleared. + return conflict_mshr; } -/* if (!(busPkt->flags & SATISFIED)) { -// blocked at a higher level, just return -return 0; -} - -*/ misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - - CacheBlk::State old_state = (blk) ? blk->status : 0; - CacheBlk::State new_state = - coherence->getNewState(busPkt, old_state); - DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", - busPkt->cmdString(), busPkt->getAddr(), old_state); - if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from state " - "%i to %i\n", busPkt->getAddr(), old_state, new_state); - - handleFill(blk, busPkt, new_state, writebacks, pkt); - //Free the packet - delete busPkt; - - // Handle writebacks if needed - while (!writebacks.empty()){ - PacketPtr wbPkt = writebacks.front(); - memSidePort->sendAtomic(wbPkt); - writebacks.pop_front(); - delete wbPkt; - } - return lat + hitLatency; - } else { - return memSidePort->sendAtomic(pkt); + // No conflicts; issue write + return write_mshr; } - } else { - if (blk) { - // There was a cache hit. - // Handle writebacks if needed - while (!writebacks.empty()){ - PacketPtr wbPkt = writebacks.front(); - memSidePort->sendAtomic(wbPkt); - writebacks.pop_front(); - delete wbPkt; - } - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + // Write buffer isn't full, but need to check it for + // conflicting earlier writeback + MSHR *conflict_mshr = + writeBuffer.findPending(miss_mshr->addr, miss_mshr->size); + if (conflict_mshr) { + // not sure why we don't check order here... it was in the + // original code but commented out. + + // The only way this happens is if we are + // doing a write and we didn't have permissions + // then subsequently saw a writeback (owned got evicted) + // We need to make sure to perform the writeback first + // To preserve the dirty data, then we can issue the write + + // should we return write_mshr here instead? I.e. do we + // have to flush writes in order? I don't think so... not + // for Alpha anyway. Maybe for x86? + return conflict_mshr; } - return hitLatency; + // No conclifts; issue read + return miss_mshr; + } + + // fall through... no pending requests. Try a prefetch. + assert(!miss_mshr && !write_mshr); + if (!mshrQueue.isFull()) { + // If we have a miss queue slot, we can try a prefetch + PacketPtr pkt = prefetcher->getPacket(); + if (pkt) { + // Update statistic on number of prefetches issued + // (hwpf_mshr_misses) + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + // Don't request bus, since we already have it + return allocateBuffer(pkt, curTick, true, false); + } } - return 0; + return NULL; } + template -Tick -Cache::snoopProbe(PacketPtr &pkt) +PacketPtr +Cache::getPacket() { - ///// PROPAGATE SNOOP UPWARD HERE + MSHR *mshr = getNextMSHR(); - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - BlkType *blk = tags->findBlock(pkt->getAddr()); - MSHR *mshr = missQueue->findMSHR(blk_addr); - CacheBlk::State new_state = 0; - bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); - if (satisfy) { - DPRINTF(Cache, "Cache snooped a %s request for addr %x and " - "now supplying data, new state is %i\n", - pkt->cmdString(), blk_addr, new_state); - - handleSnoop(blk, new_state, pkt); - return hitLatency; + if (mshr == NULL) { + return NULL; } - if (blk) - DPRINTF(Cache, "Cache snooped a %s request for addr %x, " - "new state is %i\n", - pkt->cmdString(), blk_addr, new_state); - handleSnoop(blk, new_state); - return 0; -} -template -Port * -Cache::getPort(const std::string &if_name, int idx) -{ - if (if_name == "" || if_name == "cpu_side") { - return cpuSidePort; - } else if (if_name == "mem_side") { - return memSidePort; - } else if (if_name == "functional") { - return new CpuSidePort(name() + "-cpu_side_funcport", this); + BlkType *blk = tags->findBlock(mshr->addr); + + // use request from 1st target + MSHR::Target *tgt1 = mshr->getTarget(); + PacketPtr tgt1_pkt = tgt1->pkt; + PacketPtr pkt; + + if (mshr->isCacheFill) { + MemCmd cmd; + if (blk && blk->isValid()) { + // only reason to be here is that blk is shared + // (read-only) and we need exclusive + assert(mshr->needsExclusive && !blk->isWritable()); + cmd = MemCmd::UpgradeReq; + } else { + // block is invalid + cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq; + } + pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast); } else { - panic("Port name %s unrecognized\n", if_name); + assert(blk == NULL); + assert(mshr->getNumTargets() == 1); + pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast); } -} - -template -void -Cache::deletePortRefs(Port *p) -{ - if (cpuSidePort == p || memSidePort == p) - panic("Can only delete functional ports\n"); - delete p; + pkt->senderState = mshr; + pkt->allocate(); + return pkt; } +/////////////// +// +// CpuSidePort +// +/////////////// + template void Cache::CpuSidePort:: @@ -1155,131 +1123,57 @@ template bool Cache::CpuSidePort::recvTiming(PacketPtr pkt) { - assert(pkt->result != Packet::Nacked); - - if (!pkt->req->isUncacheable() - && pkt->isInvalidate() - && !pkt->isRead() && !pkt->isWrite()) { - //Upgrade or Invalidate - //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); - - assert(!(pkt->flags & SATISFIED)); - pkt->flags |= SATISFIED; - //Invalidates/Upgrades need no response if they get the bus - return true; - } - - if (pkt->isRequest() && blocked) - { + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; return false; } - if (pkt->isWrite() && (pkt->req->isLocked())) { - pkt->req->setExtraData(1); - } - myCache()->access(pkt); + myCache()->timingAccess(pkt); return true; } template -void -Cache::CpuSidePort::recvRetry() -{ - recvRetryCommon(); -} - - -template -void -Cache::CpuSidePort::processRequestEvent() +Tick +Cache::CpuSidePort::recvAtomic(PacketPtr pkt) { - if (waitingOnRetry) - return; - //We have some responses to drain first - if (!drainList.empty()) { - if (!drainResponse()) { - // more responses to drain... re-request bus - scheduleRequestEvent(curTick + 1); - } - } + return myCache()->atomicAccess(pkt); } template void -Cache::CpuSidePort::processResponseEvent() +Cache::CpuSidePort::recvFunctional(PacketPtr pkt) { - assert(transmitList.size()); - assert(transmitList.front().first <= curTick); - PacketPtr pkt = transmitList.front().second; - transmitList.pop_front(); - if (!transmitList.empty()) { - Tick time = transmitList.front().first; - responseEvent->schedule(time <= curTick ? curTick+1 : time); - } - - if (pkt->flags & NACKED_LINE) - pkt->result = Packet::Nacked; - else - pkt->result = Packet::Success; - pkt->makeTimingResponse(); - DPRINTF(CachePort, "%s attempting to send a response\n", name()); - if (!drainList.empty() || waitingOnRetry) { - //Already have a list, just append - drainList.push_back(pkt); - DPRINTF(CachePort, "%s appending response onto drain list\n", name()); - } - else if (!sendTiming(pkt)) { - //It failed, save it to list of drain events - DPRINTF(CachePort, "%s now waiting for a retry\n", name()); - drainList.push_back(pkt); - waitingOnRetry = true; - } - - // Check if we're done draining once this list is empty - if (drainList.empty() && transmitList.empty()) - myCache()->checkDrain(); + checkFunctional(pkt); + if (pkt->result != Packet::Success) + myCache()->functionalAccess(pkt, cache->memSidePort); } template -Tick -Cache::CpuSidePort::recvAtomic(PacketPtr pkt) -{ - myCache()->probe(pkt, true, NULL); - //TEMP ALWAYS SUCCES FOR NOW - pkt->result = Packet::Success; - //Fix this timing info - return myCache()->hitLatency; -} - -template -void -Cache::CpuSidePort::recvFunctional(PacketPtr pkt) +Cache:: +CpuSidePort::CpuSidePort(const std::string &_name, + Cache *_cache) + : BaseCache::CachePort(_name, _cache) { - if (checkFunctional(pkt)) { - //TEMP USE CPU?THREAD 0 0 - pkt->req->setThreadContext(0,0); - - myCache()->probe(pkt, false, cache->memSidePort); - //TEMP ALWAYS SUCCESFUL FOR NOW - pkt->result = Packet::Success; - } } +/////////////// +// +// MemSidePort +// +/////////////// template void Cache::MemSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { - // Memory-side port always snoops. - bool dummy; - otherPort->getPeerAddressRanges(resp, dummy); + otherPort->getPeerAddressRanges(resp, snoop); + // Memory-side port always snoops, so unconditionally set flag for + // caller. snoop = true; } @@ -1303,177 +1197,115 @@ Cache::MemSidePort::recvTiming(PacketPtr pkt) if (pkt->isResponse()) { myCache()->handleResponse(pkt); } else { - myCache()->snoop(pkt); + myCache()->snoopTiming(pkt); } return true; } + template -void -Cache::MemSidePort::recvRetry() +Tick +Cache::MemSidePort::recvAtomic(PacketPtr pkt) { - if (recvRetryCommon()) { - return; - } - - DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name()); - if (!cache->isMemSideBusRequested()) { - //This can happen if I am the owner of a block and see an upgrade - //while the block was in my WB Buffers. I just remove the - //wb and de-assert the masterRequest - waitingOnRetry = false; - return; - } - PacketPtr pkt = myCache()->getPacket(); - MSHR* mshr = (MSHR*) pkt->senderState; - //Copy the packet, it may be modified/destroyed elsewhere - PacketPtr copyPkt = new Packet(*pkt); - copyPkt->dataStatic(pkt->getPtr()); - mshr->pkt = copyPkt; - - bool success = sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "succesful" : "unsuccesful"); - - waitingOnRetry = !success; - if (waitingOnRetry) { - DPRINTF(CachePort, "%s now waiting on a retry\n", name()); - } + // in atomic mode, responses go back to the sender via the + // function return from sendAtomic(), not via a separate + // sendAtomic() from the responder. Thus we should never see a + // response packet in recvAtomic() (anywhere, not just here). + assert(!pkt->isResponse()); + return myCache()->snoopAtomic(pkt); +} - myCache()->sendResult(pkt, mshr, success); - if (success && cache->isMemSideBusRequested()) - { - DPRINTF(CachePort, "%s has more requests\n", name()); - //Still more to issue, rerequest in 1 cycle - new RequestEvent(this, curTick + 1); - } +template +void +Cache::MemSidePort::recvFunctional(PacketPtr pkt) +{ + checkFunctional(pkt); + if (pkt->result != Packet::Success) + myCache()->functionalAccess(pkt, cache->cpuSidePort); } + template void -Cache::MemSidePort::processRequestEvent() +Cache::MemSidePort::sendPacket() { - if (waitingOnRetry) - return; - //We have some responses to drain first - if (!drainList.empty()) { - if (!drainResponse()) { - // more responses to drain... re-request bus - scheduleRequestEvent(curTick + 1); - } - return; - } + // if we have responses that are ready, they take precedence + if (deferredPacketReady()) { + bool success = sendTiming(transmitList.front().pkt); - DPRINTF(CachePort, "%s trying to send a MSHR request\n", name()); - if (!isBusRequested()) { - //This can happen if I am the owner of a block and see an upgrade - //while the block was in my WB Buffers. I just remove the - //wb and de-assert the masterRequest - return; - } + if (success) { + //send successful, remove packet + transmitList.pop_front(); + } - PacketPtr pkt = myCache()->getPacket(); - MSHR* mshr = (MSHR*) pkt->senderState; - //Copy the packet, it may be modified/destroyed elsewhere - PacketPtr copyPkt = new Packet(*pkt); - copyPkt->dataStatic(pkt->getPtr()); - mshr->pkt = copyPkt; + waitingOnRetry = !success; + } else { + // check for non-response packets (requests & writebacks) + PacketPtr pkt = myCache()->getPacket(); + MSHR *mshr = dynamic_cast(pkt->senderState); - bool success = sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "succesful" : "unsuccesful"); + bool success = sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "successful" : "unsuccessful"); - waitingOnRetry = !success; - if (waitingOnRetry) { - DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + waitingOnRetry = !success; + if (waitingOnRetry) { + DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + } else { + myCache()->markInService(mshr); + } } - myCache()->sendResult(pkt, mshr, success); - if (success && isBusRequested()) - { - DPRINTF(CachePort, "%s still more MSHR requests to send\n", name()); - //Still more to issue, rerequest in 1 cycle - scheduleRequestEvent(curTick+1); + + // tried to send packet... if it was successful (no retry), see if + // we need to rerequest bus or not + if (!waitingOnRetry) { + if (isBusRequested()) { + // more requests/writebacks: rerequest ASAP + DPRINTF(CachePort, "%s still more MSHR requests to send\n", + name()); + sendEvent->schedule(curTick+1); + } else if (!transmitList.empty()) { + // deferred packets: rerequest bus, but possibly not until later + Tick time = transmitList.front().tick; + sendEvent->schedule(time <= curTick ? curTick+1 : time); + } else { + // no more to send right now: if we're draining, we may be done + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } + } } } - template void -Cache::MemSidePort::processResponseEvent() +Cache::MemSidePort::recvRetry() { - assert(transmitList.size()); - assert(transmitList.front().first <= curTick); - PacketPtr pkt = transmitList.front().second; - transmitList.pop_front(); - if (!transmitList.empty()) { - Tick time = transmitList.front().first; - responseEvent->schedule(time <= curTick ? curTick+1 : time); - } - - if (pkt->flags & NACKED_LINE) - pkt->result = Packet::Nacked; - else - pkt->result = Packet::Success; - pkt->makeTimingResponse(); - DPRINTF(CachePort, "%s attempting to send a response\n", name()); - if (!drainList.empty() || waitingOnRetry) { - //Already have a list, just append - drainList.push_back(pkt); - DPRINTF(CachePort, "%s appending response onto drain list\n", name()); - } - else if (!sendTiming(pkt)) { - //It failed, save it to list of drain events - DPRINTF(CachePort, "%s now waiting for a retry\n", name()); - drainList.push_back(pkt); - waitingOnRetry = true; - } - - // Check if we're done draining once this list is empty - if (drainList.empty() && transmitList.empty()) - myCache()->checkDrain(); + assert(waitingOnRetry); + sendPacket(); } -template -Tick -Cache::MemSidePort::recvAtomic(PacketPtr pkt) -{ - if (pkt->isResponse()) - myCache()->handleResponse(pkt); - else - return myCache()->snoopProbe(pkt); - //Fix this timing info - return myCache()->hitLatency; -} - template void -Cache::MemSidePort::recvFunctional(PacketPtr pkt) +Cache::MemSidePort::processSendEvent() { - myCache()->probe(pkt, false, cache->cpuSidePort); - if (pkt->result != Packet::Success) - checkFunctional(pkt); + assert(!waitingOnRetry); + sendPacket(); } -template -Cache:: -CpuSidePort::CpuSidePort(const std::string &_name, - Cache *_cache) - : BaseCache::CachePort(_name, _cache) -{ - responseEvent = new ResponseEvent(this); -} - template Cache:: MemSidePort::MemSidePort(const std::string &_name, Cache *_cache) : BaseCache::CachePort(_name, _cache) { - responseEvent = new ResponseEvent(this); + // override default send event from SimpleTimingPort + delete sendEvent; + sendEvent = new SendEvent(this); } - diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index bc8de0d26..3fd17c8c7 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -139,31 +139,6 @@ CoherenceProtocol::regStats() .desc("readEx snoops on exclusive blocks") ; - snoopCount[Shared][MemCmd::InvalidateReq] - .name(name() + ".snoop_inv_shared") - .desc("Invalidate snoops on shared blocks") - ; - - snoopCount[Owned][MemCmd::InvalidateReq] - .name(name() + ".snoop_inv_owned") - .desc("Invalidate snoops on owned blocks") - ; - - snoopCount[Exclusive][MemCmd::InvalidateReq] - .name(name() + ".snoop_inv_exclusive") - .desc("Invalidate snoops on exclusive blocks") - ; - - snoopCount[Modified][MemCmd::InvalidateReq] - .name(name() + ".snoop_inv_modified") - .desc("Invalidate snoops on modified blocks") - ; - - snoopCount[Invalid][MemCmd::InvalidateReq] - .name(name() + ".snoop_inv_invalid") - .desc("Invalidate snoops on invalid blocks") - ; - snoopCount[Shared][MemCmd::WriteInvalidateReq] .name(name() + ".snoop_writeinv_shared") .desc("WriteInvalidate snoops on shared blocks") @@ -219,7 +194,7 @@ CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt, CacheBlk::State & new_state) { new_state = (blk->status & ~stateMask) | Shared; - pkt->flags |= SHARED_LINE; + pkt->assertShared(); return supplyTrans(cache, pkt, blk, mshr, new_state); } @@ -231,7 +206,7 @@ CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt, CacheBlk::State & new_state) { new_state = (blk->status & ~stateMask) | Owned; - pkt->flags |= SHARED_LINE; + pkt->assertShared(); return supplyTrans(cache, pkt, blk, mshr, new_state); } @@ -253,7 +228,7 @@ CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt, CacheBlk::State & new_state) { new_state = (blk->status & ~stateMask) | Shared; - pkt->flags |= SHARED_LINE; + pkt->assertShared(); return false; } @@ -336,12 +311,10 @@ CoherenceProtocol::CoherenceProtocol(const string &name, // tt[Invalid][MC::ReadReq].onSnoop(nullTransition); tt[Invalid][MC::ReadExReq].onSnoop(nullTransition); - tt[Invalid][MC::InvalidateReq].onSnoop(invalidateTrans); tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans); tt[Shared][MC::ReadReq].onSnoop(hasExclusive ? assertShared : nullTransition); tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans); - tt[Shared][MC::InvalidateReq].onSnoop(invalidateTrans); tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans); if (doUpgrades) { tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition); @@ -351,13 +324,11 @@ CoherenceProtocol::CoherenceProtocol(const string &name, tt[Modified][MC::ReadReq].onSnoop(hasOwned ? supplyAndGotoOwnedTrans : supplyAndGotoSharedTrans); - tt[Modified][MC::InvalidateReq].onSnoop(invalidateTrans); tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans); if (hasExclusive) { tt[Exclusive][MC::ReadReq].onSnoop(assertShared); tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans); - tt[Exclusive][MC::InvalidateReq].onSnoop(invalidateTrans); tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans); } @@ -365,7 +336,6 @@ CoherenceProtocol::CoherenceProtocol(const string &name, tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans); tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans); tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans); - tt[Owned][MC::InvalidateReq].onSnoop(invalidateTrans); tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans); } @@ -394,7 +364,7 @@ CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state, CacheBlk::State -CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState) +CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState) { CacheBlk::State state = oldState & stateMask; int cmd_idx = pkt->cmdToIndex(); @@ -406,7 +376,7 @@ CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState) //Check if it's exclusive and the shared line was asserted, //then goto shared instead - if (newState == Exclusive && (pkt->flags & SHARED_LINE)) { + if (newState == Exclusive && pkt->sharedAsserted()) { newState = Shared; } diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh index 775bc807a..4b8024582 100644 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -89,8 +89,8 @@ class CoherenceProtocol : public SimObject * @param oldState The current block state. * @return The new state. */ - CacheBlk::State getNewState(PacketPtr &pkt, - CacheBlk::State oldState); + CacheBlk::State getNewState(PacketPtr pkt, + CacheBlk::State oldState = 0); /** * Handle snooped bus requests. diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh index 095260ca4..214828ca7 100644 --- a/src/mem/cache/coherence/simple_coherence.hh +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -94,25 +94,14 @@ class SimpleCoherence return NULL; } - /** - * Was the CSHR request was sent successfully? - * @param pkt The request. - * @param success True if the request was sent successfully. - */ - void sendResult(PacketPtr &pkt, MSHR* cshr, bool success) - { - //Don't do coherence - return; - } - - /** * Return the proper state given the current state and the bus response. * @param pkt The bus response. * @param current The current block state. * @return The new state. */ - CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current) + CacheBlk::State getNewState(PacketPtr pkt, + CacheBlk::State current = 0) { return protocol->getNewState(pkt, current); } diff --git a/src/mem/cache/miss/SConscript b/src/mem/cache/miss/SConscript index 0f81a2570..376d670cd 100644 --- a/src/mem/cache/miss/SConscript +++ b/src/mem/cache/miss/SConscript @@ -30,8 +30,5 @@ Import('*') -Source('blocking_buffer.cc') -Source('miss_buffer.cc') -Source('miss_queue.cc') Source('mshr.cc') Source('mshr_queue.cc') diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc deleted file mode 100644 index 281328c2e..000000000 --- a/src/mem/cache/miss/blocking_buffer.cc +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -/** - * @file - * Definitions of a simple buffer for a blocking cache. - */ -#include - -#include "mem/cache/base_cache.hh" -#include "mem/cache/miss/blocking_buffer.hh" -#include "mem/cache/prefetch/base_prefetcher.hh" -#include "mem/request.hh" - -/** - * @todo Move writebacks into shared BaseBuffer class. - */ -void -BlockingBuffer::regStats(const std::string &name) -{ - MissBuffer::regStats(name); -} - - -void -BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time) -{ - Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1); - if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - !pkt->needsResponse())) { - if (!pkt->needsResponse()) { - wb.allocateAsBuffer(pkt); - } else { - wb.allocate(pkt->cmd, blk_addr, blk_size, pkt); - } - - std::memcpy(wb.pkt->getPtr(), pkt->getPtr(), blk_size); - - cache->setBlocked(Blocked_NoWBBuffers); - cache->requestMemSideBus(Request_WB, time); - return; - } - - if (!pkt->needsResponse()) { - miss.allocateAsBuffer(pkt); - } else { - miss.allocate(pkt->cmd, blk_addr, blk_size, pkt); - } - if (!pkt->req->isUncacheable()) { - miss.pkt->flags |= CACHE_LINE_FILL; - } - cache->setBlocked(Blocked_NoMSHRs); - cache->requestMemSideBus(Request_MSHR, time); -} - -PacketPtr -BlockingBuffer::getPacket() -{ - if (miss.pkt && !miss.inService) { - return miss.pkt; - } - return wb.pkt; -} - -void -BlockingBuffer::setBusCmd(PacketPtr &pkt, MemCmd cmd) -{ - MSHR *mshr = (MSHR*) pkt->senderState; - mshr->originalCmd = pkt->cmd; - if (pkt->isCacheFill()) - pkt->cmdOverride(cmd); -} - -void -BlockingBuffer::restoreOrigCmd(PacketPtr &pkt) -{ - pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd); -} - -void -BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr) -{ - if (!pkt->isCacheFill() && pkt->isWrite()) { - // Forwarding a write/ writeback, don't need to change - // the command - assert(mshr == &wb); - cache->deassertMemSideBusRequest(Request_WB); - if (!pkt->needsResponse()) { - assert(wb.getNumTargets() == 0); - wb.deallocate(); - cache->clearBlocked(Blocked_NoWBBuffers); - } else { - wb.inService = true; - } - } else { - assert(mshr == &miss); - cache->deassertMemSideBusRequest(Request_MSHR); - if (!pkt->needsResponse()) { - assert(miss.getNumTargets() == 0); - miss.deallocate(); - cache->clearBlocked(Blocked_NoMSHRs); - } else { - //mark in service - miss.inService = true; - } - } -} - -void -BlockingBuffer::handleResponse(PacketPtr &pkt, Tick time) -{ - if (pkt->isCacheFill()) { - // targets were handled in the cache tags - assert((MSHR*)pkt->senderState == &miss); - miss.deallocate(); - cache->clearBlocked(Blocked_NoMSHRs); - } else { - if (((MSHR*)(pkt->senderState))->hasTargets()) { - // Should only have 1 target if we had any - assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1); - PacketPtr target = ((MSHR*)(pkt->senderState))->getTarget(); - ((MSHR*)(pkt->senderState))->popTarget(); - if (pkt->isRead()) { - std::memcpy(target->getPtr(), pkt->getPtr(), target->getSize()); - } - cache->respond(target, time); - assert(!((MSHR*)(pkt->senderState))->hasTargets()); - } - - if (pkt->isWrite()) { - assert(((MSHR*)(pkt->senderState)) == &wb); - wb.deallocate(); - cache->clearBlocked(Blocked_NoWBBuffers); - } else { - miss.deallocate(); - cache->clearBlocked(Blocked_NoMSHRs); - } - } -} - -void -BlockingBuffer::squash(int threadNum) -{ - if (miss.threadNum == threadNum) { - PacketPtr target = miss.getTarget(); - miss.popTarget(); - assert(0/*target->req->getThreadNum()*/ == threadNum); - target = NULL; - assert(!miss.hasTargets()); - miss.ntargets=0; - if (!miss.inService) { - miss.deallocate(); - cache->clearBlocked(Blocked_NoMSHRs); - cache->deassertMemSideBusRequest(Request_MSHR); - } - } -} - -void -BlockingBuffer::doWriteback(Addr addr, - int size, uint8_t *data, bool compressed) -{ - // Generate request - Request * req = new Request(addr, size, 0); - PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1); - pkt->allocate(); - if (data) { - std::memcpy(pkt->getPtr(), data, size); - } - - if (compressed) { - pkt->flags |= COMPRESSED; - } - - ///All writebacks charged to same thread @todo figure this out - writebacks[0/*pkt->req->getThreadNum()*/]++; - - wb.allocateAsBuffer(pkt); - cache->requestMemSideBus(Request_WB, curTick); - cache->setBlocked(Blocked_NoWBBuffers); -} - - - -void -BlockingBuffer::doWriteback(PacketPtr &pkt) -{ - writebacks[0/*pkt->req->getThreadNum()*/]++; - - wb.allocateAsBuffer(pkt); - - // Since allocate as buffer copies the request, - // need to copy data here. - std::memcpy(wb.pkt->getPtr(), pkt->getPtr(), pkt->getSize()); - - cache->setBlocked(Blocked_NoWBBuffers); - cache->requestMemSideBus(Request_WB, curTick); -} - - -MSHR * -BlockingBuffer::findMSHR(Addr addr) -{ - if (miss.addr == addr && miss.pkt) - return &miss; - return NULL; -} - - -bool -BlockingBuffer::findWrites(Addr addr, std::vector& writes) -{ - if (wb.addr == addr && wb.pkt) { - writes.push_back(&wb); - return true; - } - return false; -} diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh deleted file mode 100644 index 86b24d539..000000000 --- a/src/mem/cache/miss/blocking_buffer.hh +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -/** - * @file - * Declaration of a simple buffer for a blocking cache. - */ - -#ifndef __BLOCKING_BUFFER_HH__ -#define __BLOCKING_BUFFER_HH__ - -#include - -#include "base/misc.hh" // for fatal() -#include "mem/cache/miss/miss_buffer.hh" -#include "mem/cache/miss/mshr.hh" - -/** - * Miss and writeback storage for a blocking cache. - */ -class BlockingBuffer : public MissBuffer -{ -protected: - /** Miss storage. */ - MSHR miss; - /** WB storage. */ - MSHR wb; - -public: - /** - * Builds and initializes this buffer. - * @param write_allocate If true, treat write misses the same as reads. - */ - BlockingBuffer(bool write_allocate) - : MissBuffer(write_allocate) - { - } - - /** - * Register statistics for this object. - * @param name The name of the parent cache. - */ - void regStats(const std::string &name); - - /** - * Handle a cache miss properly. Requests the bus and marks the cache as - * blocked. - * @param pkt The request that missed in the cache. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - */ - void handleMiss(PacketPtr &pkt, int blk_size, Tick time); - - /** - * Fetch the block for the given address and buffer the given target. - * @param addr The address to fetch. - * @param asid The address space of the address. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - * @param target The target for the fetch. - */ - MSHR* fetchBlock(Addr addr, int blk_size, Tick time, - PacketPtr &target) - { - fatal("Unimplemented"); - M5_DUMMY_RETURN - } - - /** - * Selects a outstanding request to service. - * @return The request to service, NULL if none found. - */ - PacketPtr getPacket(); - - /** - * Set the command to the given bus command. - * @param pkt The request to update. - * @param cmd The bus command to use. - */ - void setBusCmd(PacketPtr &pkt, MemCmd cmd); - - /** - * Restore the original command in case of a bus transmission error. - * @param pkt The request to reset. - */ - void restoreOrigCmd(PacketPtr &pkt); - - /** - * Marks a request as in service (sent on the bus). This can have side - * effect since storage for no response commands is deallocated once they - * are successfully sent. - * @param pkt The request that was sent on the bus. - */ - void markInService(PacketPtr &pkt, MSHR* mshr); - - /** - * Frees the resources of the request and unblock the cache. - * @param pkt The request that has been satisfied. - * @param time The time when the request is satisfied. - */ - void handleResponse(PacketPtr &pkt, Tick time); - - /** - * Removes all outstanding requests for a given thread number. If a request - * has been sent to the bus, this function removes all of its targets. - * @param threadNum The thread number of the requests to squash. - */ - void squash(int threadNum); - - /** - * Return the current number of outstanding misses. - * @return the number of outstanding misses. - */ - int getMisses() - { - return miss.getNumTargets(); - } - - /** - * Searches for the supplied address in the miss "queue". - * @param addr The address to look for. - * @param asid The address space id. - * @return A pointer to miss if it matches. - */ - MSHR* findMSHR(Addr addr); - - /** - * Searches for the supplied address in the write buffer. - * @param addr The address to look for. - * @param asid The address space id. - * @param writes List of pointers to the matching writes. - * @return True if there is a matching write. - */ - bool findWrites(Addr addr, std::vector& writes); - - /** - * Perform a writeback of dirty data to the given address. - * @param addr The address to write to. - * @param asid The address space id. - * @param size The number of bytes to write. - * @param data The data to write, can be NULL. - * @param compressed True if the data is compressed. - */ - void doWriteback(Addr addr, - int size, uint8_t *data, bool compressed); - - /** - * Perform a writeback request. - * @param pkt The writeback request. - */ - void doWriteback(PacketPtr &pkt); - - /** - * Returns true if there are outstanding requests. - * @return True if there are outstanding requests. - */ - bool havePending() - { - return !miss.inService || !wb.inService; - } - - /** - * Add a target to the given MSHR. This assumes it is in the miss queue. - * @param mshr The mshr to add a target to. - * @param pkt The target to add. - */ - void addTarget(MSHR *mshr, PacketPtr &pkt) - { - fatal("Shouldn't call this on a blocking buffer."); - } - - /** - * Dummy implmentation. - */ - MSHR* allocateTargetList(Addr addr) - { - fatal("Unimplemented"); - M5_DUMMY_RETURN - } -}; - -#endif // __BLOCKING_BUFFER_HH__ diff --git a/src/mem/cache/miss/miss_buffer.cc b/src/mem/cache/miss/miss_buffer.cc deleted file mode 100644 index 4d9cd0958..000000000 --- a/src/mem/cache/miss/miss_buffer.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2003-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -#include "cpu/smt.hh" //for maxThreadsPerCPU -#include "mem/cache/base_cache.hh" -#include "mem/cache/miss/miss_buffer.hh" -#include "mem/cache/prefetch/base_prefetcher.hh" - -/** - * @todo Move writebacks into shared BaseBuffer class. - */ -void -MissBuffer::regStats(const std::string &name) -{ - using namespace Stats; - writebacks - .init(maxThreadsPerCPU) - .name(name + ".writebacks") - .desc("number of writebacks") - .flags(total) - ; -} - -void -MissBuffer::setCache(BaseCache *_cache) -{ - cache = _cache; - blkSize = cache->getBlockSize(); -} - -void -MissBuffer::setPrefetcher(BasePrefetcher *_prefetcher) -{ - prefetcher = _prefetcher; -} diff --git a/src/mem/cache/miss/miss_buffer.hh b/src/mem/cache/miss/miss_buffer.hh deleted file mode 100644 index 9a86db304..000000000 --- a/src/mem/cache/miss/miss_buffer.hh +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2003-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Steve Reinhardt - */ - -/** - * @file - * MissBuffer declaration. - */ - -#ifndef __MISS_BUFFER_HH__ -#define __MISS_BUFFER_HH__ - -class BaseCache; -class BasePrefetcher; -class MSHR; - -/** - * Abstract base class for cache miss buffering. - */ -class MissBuffer -{ - protected: - /** True if the cache should allocate on a write miss. */ - const bool writeAllocate; - - /** Pointer to the parent cache. */ - BaseCache *cache; - - /** The Prefetcher */ - BasePrefetcher *prefetcher; - - /** Block size of the parent cache. */ - int blkSize; - - // Statistics - /** - * @addtogroup CacheStatistics - * @{ - */ - /** Number of blocks written back per thread. */ - Stats::Vector<> writebacks; - - /** - * @} - */ - - public: - MissBuffer(bool write_allocate) - : writeAllocate(write_allocate) - { - } - - virtual ~MissBuffer() {} - - /** - * Called by the parent cache to set the back pointer. - * @param _cache A pointer to the parent cache. - */ - void setCache(BaseCache *_cache); - - void setPrefetcher(BasePrefetcher *_prefetcher); - - /** - * Register statistics for this object. - * @param name The name of the parent cache. - */ - virtual void regStats(const std::string &name); - - /** - * Handle a cache miss properly. Either allocate an MSHR for the request, - * or forward it through the write buffer. - * @param pkt The request that missed in the cache. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - */ - virtual void handleMiss(PacketPtr &pkt, int blk_size, Tick time) = 0; - - /** - * Fetch the block for the given address and buffer the given target. - * @param addr The address to fetch. - * @param asid The address space of the address. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - * @param target The target for the fetch. - */ - virtual MSHR *fetchBlock(Addr addr, int blk_size, Tick time, - PacketPtr &target) = 0; - - /** - * Selects a outstanding request to service. - * @return The request to service, NULL if none found. - */ - virtual PacketPtr getPacket() = 0; - - /** - * Set the command to the given bus command. - * @param pkt The request to update. - * @param cmd The bus command to use. - */ - virtual void setBusCmd(PacketPtr &pkt, MemCmd cmd) = 0; - - /** - * Restore the original command in case of a bus transmission error. - * @param pkt The request to reset. - */ - virtual void restoreOrigCmd(PacketPtr &pkt) = 0; - - /** - * Marks a request as in service (sent on the bus). This can have side - * effect since storage for no response commands is deallocated once they - * are successfully sent. - * @param pkt The request that was sent on the bus. - */ - virtual void markInService(PacketPtr &pkt, MSHR* mshr) = 0; - - /** - * Collect statistics and free resources of a satisfied request. - * @param pkt The request that has been satisfied. - * @param time The time when the request is satisfied. - */ - virtual void handleResponse(PacketPtr &pkt, Tick time) = 0; - - /** - * Removes all outstanding requests for a given thread number. If a request - * has been sent to the bus, this function removes all of its targets. - * @param threadNum The thread number of the requests to squash. - */ - virtual void squash(int threadNum) = 0; - - /** - * Return the current number of outstanding misses. - * @return the number of outstanding misses. - */ - virtual int getMisses() = 0; - - /** - * Searches for the supplied address in the miss queue. - * @param addr The address to look for. - * @param asid The address space id. - * @return The MSHR that contains the address, NULL if not found. - * @warning Currently only searches the miss queue. If non write allocate - * might need to search the write buffer for coherence. - */ - virtual MSHR* findMSHR(Addr addr) = 0; - - /** - * Searches for the supplied address in the write buffer. - * @param addr The address to look for. - * @param asid The address space id. - * @param writes The list of writes that match the address. - * @return True if any writes are found - */ - virtual bool findWrites(Addr addr, std::vector& writes) = 0; - - /** - * Perform a writeback of dirty data to the given address. - * @param addr The address to write to. - * @param asid The address space id. - * @param xc The execution context of the address space. - * @param size The number of bytes to write. - * @param data The data to write, can be NULL. - * @param compressed True if the data is compressed. - */ - virtual void doWriteback(Addr addr, int size, uint8_t *data, - bool compressed) = 0; - - /** - * Perform the given writeback request. - * @param pkt The writeback request. - */ - virtual void doWriteback(PacketPtr &pkt) = 0; - - /** - * Returns true if there are outstanding requests. - * @return True if there are outstanding requests. - */ - virtual bool havePending() = 0; - - /** - * Add a target to the given MSHR. This assumes it is in the miss queue. - * @param mshr The mshr to add a target to. - * @param pkt The target to add. - */ - virtual void addTarget(MSHR *mshr, PacketPtr &pkt) = 0; - - /** - * Allocate a MSHR to hold a list of targets to a block involved in a copy. - * If the block is marked done then the MSHR already holds the data to - * fill the block. Otherwise the block needs to be fetched. - * @param addr The address to buffer. - * @param asid The address space ID. - * @return A pointer to the allocated MSHR. - */ - virtual MSHR* allocateTargetList(Addr addr) = 0; -}; - -#endif //__MISS_BUFFER_HH__ diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc deleted file mode 100644 index 67036ed02..000000000 --- a/src/mem/cache/miss/miss_queue.cc +++ /dev/null @@ -1,752 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - * Ron Dreslinski - */ - -/** - * @file - * Miss and writeback queue definitions. - */ - -#include "cpu/smt.hh" //for maxThreadsPerCPU -#include "mem/cache/base_cache.hh" -#include "mem/cache/miss/miss_queue.hh" -#include "mem/cache/prefetch/base_prefetcher.hh" - -using namespace std; - -// simple constructor -/** - * @todo Remove the +16 from the write buffer constructor once we handle - * stalling on writebacks do to compression writes. - */ -MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, - bool write_allocate, bool prefetch_miss) - : MissBuffer(write_allocate), - mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs), - numTarget(numTargets), writeBuffers(write_buffers), - order(0), prefetchMiss(prefetch_miss) -{ - noTargetMSHR = NULL; -} - - -MissQueue::~MissQueue() -{ -} - - -void -MissQueue::regStats(const string &name) -{ - MissBuffer::regStats(name); - - using namespace Stats; - - // MSHR hit statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_hits[access_idx] - .init(maxThreadsPerCPU) - .name(name + "." + cstr + "_mshr_hits") - .desc("number of " + cstr + " MSHR hits") - .flags(total | nozero | nonan) - ; - } - - demandMshrHits - .name(name + ".demand_mshr_hits") - .desc("number of demand (read+write) MSHR hits") - .flags(total) - ; - demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq]; - - overallMshrHits - .name(name + ".overall_mshr_hits") - .desc("number of overall MSHR hits") - .flags(total) - ; - overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] + - mshr_hits[MemCmd::HardPFReq]; - - // MSHR miss statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_misses[access_idx] - .init(maxThreadsPerCPU) - .name(name + "." + cstr + "_mshr_misses") - .desc("number of " + cstr + " MSHR misses") - .flags(total | nozero | nonan) - ; - } - - demandMshrMisses - .name(name + ".demand_mshr_misses") - .desc("number of demand (read+write) MSHR misses") - .flags(total) - ; - demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq]; - - overallMshrMisses - .name(name + ".overall_mshr_misses") - .desc("number of overall MSHR misses") - .flags(total) - ; - overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] + - mshr_misses[MemCmd::HardPFReq]; - - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_miss_latency[access_idx] - .init(maxThreadsPerCPU) - .name(name + "." + cstr + "_mshr_miss_latency") - .desc("number of " + cstr + " MSHR miss cycles") - .flags(total | nozero | nonan) - ; - } - - demandMshrMissLatency - .name(name + ".demand_mshr_miss_latency") - .desc("number of demand (read+write) MSHR miss cycles") - .flags(total) - ; - demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq] - + mshr_miss_latency[MemCmd::WriteReq]; - - overallMshrMissLatency - .name(name + ".overall_mshr_miss_latency") - .desc("number of overall MSHR miss cycles") - .flags(total) - ; - overallMshrMissLatency = demandMshrMissLatency + - mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq]; - - // MSHR uncacheable statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable[access_idx] - .init(maxThreadsPerCPU) - .name(name + "." + cstr + "_mshr_uncacheable") - .desc("number of " + cstr + " MSHR uncacheable") - .flags(total | nozero | nonan) - ; - } - - overallMshrUncacheable - .name(name + ".overall_mshr_uncacheable_misses") - .desc("number of overall MSHR uncacheable misses") - .flags(total) - ; - overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq] - + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq] - + mshr_uncacheable[MemCmd::HardPFReq]; - - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable_lat[access_idx] - .init(maxThreadsPerCPU) - .name(name + "." + cstr + "_mshr_uncacheable_latency") - .desc("number of " + cstr + " MSHR uncacheable cycles") - .flags(total | nozero | nonan) - ; - } - - overallMshrUncacheableLatency - .name(name + ".overall_mshr_uncacheable_latency") - .desc("number of overall MSHR uncacheable cycles") - .flags(total) - ; - overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq] - + mshr_uncacheable_lat[MemCmd::WriteReq] - + mshr_uncacheable_lat[MemCmd::SoftPFReq] - + mshr_uncacheable_lat[MemCmd::HardPFReq]; - -#if 0 - // MSHR access formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshrAccesses[access_idx] - .name(name + "." + cstr + "_mshr_accesses") - .desc("number of " + cstr + " mshr accesses(hits+misses)") - .flags(total | nozero | nonan) - ; - mshrAccesses[access_idx] = - mshr_hits[access_idx] + mshr_misses[access_idx] - + mshr_uncacheable[access_idx]; - } - - demandMshrAccesses - .name(name + ".demand_mshr_accesses") - .desc("number of demand (read+write) mshr accesses") - .flags(total | nozero | nonan) - ; - demandMshrAccesses = demandMshrHits + demandMshrMisses; - - overallMshrAccesses - .name(name + ".overall_mshr_accesses") - .desc("number of overall (read+write) mshr accesses") - .flags(total | nozero | nonan) - ; - overallMshrAccesses = overallMshrHits + overallMshrMisses - + overallMshrUncacheable; -#endif - - // MSHR miss rate formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshrMissRate[access_idx] - .name(name + "." + cstr + "_mshr_miss_rate") - .desc("mshr miss rate for " + cstr + " accesses") - .flags(total | nozero | nonan) - ; - - mshrMissRate[access_idx] = - mshr_misses[access_idx] / cache->accesses[access_idx]; - } - - demandMshrMissRate - .name(name + ".demand_mshr_miss_rate") - .desc("mshr miss rate for demand accesses") - .flags(total) - ; - demandMshrMissRate = demandMshrMisses / cache->demandAccesses; - - overallMshrMissRate - .name(name + ".overall_mshr_miss_rate") - .desc("mshr miss rate for overall accesses") - .flags(total) - ; - overallMshrMissRate = overallMshrMisses / cache->overallAccesses; - - // mshrMiss latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMshrMissLatency[access_idx] - .name(name + "." + cstr + "_avg_mshr_miss_latency") - .desc("average " + cstr + " mshr miss latency") - .flags(total | nozero | nonan) - ; - - avgMshrMissLatency[access_idx] = - mshr_miss_latency[access_idx] / mshr_misses[access_idx]; - } - - demandAvgMshrMissLatency - .name(name + ".demand_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total) - ; - demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; - - overallAvgMshrMissLatency - .name(name + ".overall_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total) - ; - overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; - - // mshrUncacheable latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMshrUncacheableLatency[access_idx] - .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") - .desc("average " + cstr + " mshr uncacheable latency") - .flags(total | nozero | nonan) - ; - - avgMshrUncacheableLatency[access_idx] = - mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; - } - - overallAvgMshrUncacheableLatency - .name(name + ".overall_avg_mshr_uncacheable_latency") - .desc("average overall mshr uncacheable latency") - .flags(total) - ; - overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; - - mshr_cap_events - .init(maxThreadsPerCPU) - .name(name + ".mshr_cap_events") - .desc("number of times MSHR cap was activated") - .flags(total) - ; - - //software prefetching stats - soft_prefetch_mshr_full - .init(maxThreadsPerCPU) - .name(name + ".soft_prefetch_mshr_full") - .desc("number of mshr full events for SW prefetching instrutions") - .flags(total) - ; - - mshr_no_allocate_misses - .name(name +".no_allocate_misses") - .desc("Number of misses that were no-allocate") - ; - -} - - -MSHR* -MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time) -{ - MSHR* mshr = mq.allocate(pkt, size); - mshr->order = order++; - if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { - // Mark this as a cache line fill - mshr->pkt->flags |= CACHE_LINE_FILL; - } - if (mq.isFull()) { - cache->setBlocked(Blocked_NoMSHRs); - } - if (pkt->cmd != MemCmd::HardPFReq) { - //If we need to request the bus (not on HW prefetch), do so - cache->requestMemSideBus(Request_MSHR, time); - } - return mshr; -} - - -MSHR* -MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time) -{ - MSHR* mshr = wb.allocate(pkt,size); - mshr->order = order++; - -//REMOVING COMPRESSION FOR NOW -#if 0 - if (pkt->isCompressed()) { - mshr->pkt->deleteData(); - mshr->pkt->actualSize = pkt->actualSize; - mshr->pkt->data = new uint8_t[pkt->actualSize]; - memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); - } else { -#endif - memcpy(mshr->pkt->getPtr(), pkt->getPtr(), pkt->getSize()); - //{ - - if (wb.isFull()) { - cache->setBlocked(Blocked_NoWBBuffers); - } - - cache->requestMemSideBus(Request_WB, time); - - return mshr; -} - - -/** - * @todo Remove SW prefetches on mshr hits. - */ -void -MissQueue::handleMiss(PacketPtr &pkt, int blkSize, Tick time) -{ -// if (!cache->isTopLevel()) - if (prefetchMiss) prefetcher->handleMiss(pkt, time); - - int size = blkSize; - Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); - MSHR* mshr = NULL; - if (!pkt->req->isUncacheable()) { - mshr = mq.findMatch(blkAddr); - if (mshr) { - //@todo remove hw_pf here - mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { - mshr->threadNum = -1; - } - mq.allocateTarget(mshr, pkt); - if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { - //We are adding an allocate after a no-allocate - mshr->pkt->flags &= ~NO_ALLOCATE; - } - if (mshr->getNumTargets() == numTarget) { - noTargetMSHR = mshr; - cache->setBlocked(Blocked_NoTargets); - mq.moveToFront(mshr); - } - return; - } - if (pkt->isNoAllocate()) { - //Count no-allocate requests differently - mshr_no_allocate_misses++; - } - else { - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - } - } else { - //Count uncacheable accesses - mshr_uncacheable[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - size = pkt->getSize(); - } - if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - !pkt->needsResponse())) { - /** - * @todo Add write merging here. - */ - mshr = allocateWrite(pkt, pkt->getSize(), time); - return; - } - - mshr = allocateMiss(pkt, blkSize, time); -} - -MSHR* -MissQueue::fetchBlock(Addr addr, int blk_size, Tick time, - PacketPtr &target) -{ - Addr blkAddr = addr & ~(Addr)(blk_size - 1); - assert(mq.findMatch(addr) == NULL); - MSHR *mshr = mq.allocateFetch(blkAddr, blk_size, target); - mshr->order = order++; - mshr->pkt->flags |= CACHE_LINE_FILL; - if (mq.isFull()) { - cache->setBlocked(Blocked_NoMSHRs); - } - cache->requestMemSideBus(Request_MSHR, time); - return mshr; -} - -PacketPtr -MissQueue::getPacket() -{ - PacketPtr pkt = mq.getReq(); - if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt || - pkt->time > curTick) && wb.havePending()) { - pkt = wb.getReq(); - // Need to search for earlier miss. - MSHR *mshr = mq.findPending(pkt); - if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) { - // Service misses in order until conflict is cleared. - return mq.getReq(); - } - } - if (pkt) { - MSHR* mshr = wb.findPending(pkt); - if (mshr /*&& mshr->order < pkt->senderState->order*/) { - // The only way this happens is if we are - // doing a write and we didn't have permissions - // then subsequently saw a writeback(owned got evicted) - // We need to make sure to perform the writeback first - // To preserve the dirty data, then we can issue the write - return wb.getReq(); - } - } - else if (!mq.isFull()){ - //If we have a miss queue slot, we can try a prefetch - pkt = prefetcher->getPacket(); - if (pkt) { - //Update statistic on number of prefetches issued (hwpf_mshr_misses) - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - //It will request the bus for the future, but should clear that immedieatley - allocateMiss(pkt, pkt->getSize(), curTick); - pkt = mq.getReq(); - assert(pkt); //We should get back a req b/c we just put one in - } - } - return pkt; -} - -void -MissQueue::setBusCmd(PacketPtr &pkt, MemCmd cmd) -{ - assert(pkt->senderState != 0); - MSHR * mshr = (MSHR*)pkt->senderState; - mshr->originalCmd = pkt->cmd; - if (cmd == MemCmd::UpgradeReq || cmd == MemCmd::InvalidateReq) { - pkt->flags |= NO_ALLOCATE; - pkt->flags &= ~CACHE_LINE_FILL; - } - else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() && - cmd.needsResponse()) { - pkt->flags |= CACHE_LINE_FILL; - } - if (pkt->isCacheFill() || pkt->isNoAllocate()) - pkt->cmd = cmd; -} - -void -MissQueue::restoreOrigCmd(PacketPtr &pkt) -{ - pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd; -} - -void -MissQueue::markInService(PacketPtr &pkt, MSHR* mshr) -{ - bool unblock = false; - BlockedCause cause = NUM_BLOCKED_CAUSES; - - /** - * @todo Should include MSHRQueue pointer in MSHR to select the correct - * one. - */ - if ((!pkt->isCacheFill() && pkt->isWrite())) { - // Forwarding a write/ writeback, don't need to change - // the command - unblock = wb.isFull(); - wb.markInService(mshr); - if (!wb.havePending()){ - cache->deassertMemSideBusRequest(Request_WB); - } - if (unblock) { - // Do we really unblock? - unblock = !wb.isFull(); - cause = Blocked_NoWBBuffers; - } - } else { - unblock = mq.isFull(); - mq.markInService(mshr); - if (!mq.havePending()){ - cache->deassertMemSideBusRequest(Request_MSHR); - } - if (mshr->originalCmd == MemCmd::HardPFReq) { - DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", - cache->name()); - //Also clear pending if need be - if (!prefetcher->havePending()) - { - cache->deassertMemSideBusRequest(Request_PF); - } - } - if (unblock) { - unblock = !mq.isFull(); - cause = Blocked_NoMSHRs; - } - } - if (unblock) { - cache->clearBlocked(cause); - } -} - - -void -MissQueue::handleResponse(PacketPtr &pkt, Tick time) -{ - MSHR* mshr = (MSHR*)pkt->senderState; - if (((MSHR*)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) { - DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", - cache->name()); - } -#ifndef NDEBUG - int num_targets = mshr->getNumTargets(); -#endif - - bool unblock = false; - bool unblock_target = false; - BlockedCause cause = NUM_BLOCKED_CAUSES; - - if (pkt->isCacheFill() && !pkt->isNoAllocate()) { - mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; - // targets were handled in the cache tags - if (mshr == noTargetMSHR) { - // we always clear at least one target - unblock_target = true; - cause = Blocked_NoTargets; - noTargetMSHR = NULL; - } - - if (mshr->hasTargets()) { - // Didn't satisfy all the targets, need to resend - MemCmd cmd = mshr->getTarget()->cmd; - mshr->pkt->setDest(Packet::Broadcast); - mshr->pkt->result = Packet::Unknown; - mshr->pkt->req = mshr->getTarget()->req; - mq.markPending(mshr, cmd); - mshr->order = order++; - cache->requestMemSideBus(Request_MSHR, time); - } - else { - unblock = mq.isFull(); - mq.deallocate(mshr); - if (unblock) { - unblock = !mq.isFull(); - cause = Blocked_NoMSHRs; - } - } - } else { - if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; - } - if (mshr->hasTargets() && pkt->req->isUncacheable()) { - // Should only have 1 target if we had any - assert(num_targets == 1); - PacketPtr target = mshr->getTarget(); - mshr->popTarget(); - if (pkt->isRead()) { - memcpy(target->getPtr(), pkt->getPtr(), - target->getSize()); - } - cache->respond(target, time); - assert(!mshr->hasTargets()); - } - else if (mshr->hasTargets()) { - //Must be a no_allocate with possibly more than one target - assert(mshr->pkt->isNoAllocate()); - while (mshr->hasTargets()) { - PacketPtr target = mshr->getTarget(); - mshr->popTarget(); - if (pkt->isRead()) { - memcpy(target->getPtr(), pkt->getPtr(), - target->getSize()); - } - cache->respond(target, time); - } - } - - if (pkt->isWrite()) { - // If the wrtie buffer is full, we might unblock now - unblock = wb.isFull(); - wb.deallocate(mshr); - if (unblock) { - // Did we really unblock? - unblock = !wb.isFull(); - cause = Blocked_NoWBBuffers; - } - } else { - unblock = mq.isFull(); - mq.deallocate(mshr); - if (unblock) { - unblock = !mq.isFull(); - cause = Blocked_NoMSHRs; - } - } - } - if (unblock || unblock_target) { - cache->clearBlocked(cause); - } -} - -void -MissQueue::squash(int threadNum) -{ - bool unblock = false; - BlockedCause cause = NUM_BLOCKED_CAUSES; - - if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) { - noTargetMSHR = NULL; - unblock = true; - cause = Blocked_NoTargets; - } - if (mq.isFull()) { - unblock = true; - cause = Blocked_NoMSHRs; - } - mq.squash(threadNum); - if (!mq.havePending()) { - cache->deassertMemSideBusRequest(Request_MSHR); - } - if (unblock && !mq.isFull()) { - cache->clearBlocked(cause); - } - -} - -MSHR* -MissQueue::findMSHR(Addr addr) -{ - return mq.findMatch(addr); -} - -bool -MissQueue::findWrites(Addr addr, vector &writes) -{ - return wb.findMatches(addr,writes); -} - -void -MissQueue::doWriteback(Addr addr, - int size, uint8_t *data, bool compressed) -{ - // Generate request - Request * req = new Request(addr, size, 0); - PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1); - pkt->allocate(); - if (data) { - memcpy(pkt->getPtr(), data, size); - } - - if (compressed) { - pkt->flags |= COMPRESSED; - } - - ///All writebacks charged to same thread @todo figure this out - writebacks[0/*pkt->req->getThreadNum()*/]++; - - allocateWrite(pkt, 0, curTick); -} - - -void -MissQueue::doWriteback(PacketPtr &pkt) -{ - writebacks[0/*pkt->req->getThreadNum()*/]++; - allocateWrite(pkt, 0, curTick); -} - - -MSHR* -MissQueue::allocateTargetList(Addr addr) -{ - MSHR* mshr = mq.allocateTargetList(addr, blkSize); - mshr->pkt->flags |= CACHE_LINE_FILL; - if (mq.isFull()) { - cache->setBlocked(Blocked_NoMSHRs); - } - return mshr; -} - -bool -MissQueue::havePending() -{ - return mq.havePending() || wb.havePending() || prefetcher->havePending(); -} diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh deleted file mode 100644 index d3560ff36..000000000 --- a/src/mem/cache/miss/miss_queue.hh +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - */ - -/** - * @file - * Miss and writeback queue declarations. - */ - -#ifndef __MISS_QUEUE_HH__ -#define __MISS_QUEUE_HH__ - -#include - -#include "mem/cache/miss/miss_buffer.hh" -#include "mem/cache/miss/mshr.hh" -#include "mem/cache/miss/mshr_queue.hh" -#include "base/statistics.hh" - -/** - * Manages cache misses and writebacks. Contains MSHRs to store miss data - * and the writebuffer for writes/writebacks. - * @todo need to handle data on writes better (encapsulate). - * @todo need to make replacements/writebacks happen in Cache::access - */ -class MissQueue : public MissBuffer -{ - protected: - /** The MSHRs. */ - MSHRQueue mq; - /** Write Buffer. */ - MSHRQueue wb; - - // PARAMTERS - - /** The number of MSHRs in the miss queue. */ - const int numMSHR; - /** The number of targets for each MSHR. */ - const int numTarget; - /** The number of write buffers. */ - const int writeBuffers; - - /** Increasing order number assigned to each incoming request. */ - uint64_t order; - - bool prefetchMiss; - - // Statistics - /** - * @addtogroup CacheStatistics - * @{ - */ - /** Number of misses that hit in the MSHRs per command and thread. */ - Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS]; - /** Demand misses that hit in the MSHRs. */ - Stats::Formula demandMshrHits; - /** Total number of misses that hit in the MSHRs. */ - Stats::Formula overallMshrHits; - - /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS]; - /** Demand misses that miss in the MSHRs. */ - Stats::Formula demandMshrMisses; - /** Total number of misses that miss in the MSHRs. */ - Stats::Formula overallMshrMisses; - - /** Number of misses that miss in the MSHRs, per command and thread. */ - Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS]; - /** Total number of misses that miss in the MSHRs. */ - Stats::Formula overallMshrUncacheable; - - /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS]; - /** Total cycle latency of demand MSHR misses. */ - Stats::Formula demandMshrMissLatency; - /** Total cycle latency of overall MSHR misses. */ - Stats::Formula overallMshrMissLatency; - - /** Total cycle latency of each MSHR miss, per command and thread. */ - Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS]; - /** Total cycle latency of overall MSHR misses. */ - Stats::Formula overallMshrUncacheableLatency; - - /** The total number of MSHR accesses per command and thread. */ - Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS]; - /** The total number of demand MSHR accesses. */ - Stats::Formula demandMshrAccesses; - /** The total number of MSHR accesses. */ - Stats::Formula overallMshrAccesses; - - /** The miss rate in the MSHRs pre command and thread. */ - Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS]; - /** The demand miss rate in the MSHRs. */ - Stats::Formula demandMshrMissRate; - /** The overall miss rate in the MSHRs. */ - Stats::Formula overallMshrMissRate; - - /** The average latency of an MSHR miss, per command and thread. */ - Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS]; - /** The average latency of a demand MSHR miss. */ - Stats::Formula demandAvgMshrMissLatency; - /** The average overall latency of an MSHR miss. */ - Stats::Formula overallAvgMshrMissLatency; - - /** The average latency of an MSHR miss, per command and thread. */ - Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS]; - /** The average overall latency of an MSHR miss. */ - Stats::Formula overallAvgMshrUncacheableLatency; - - /** The number of times a thread hit its MSHR cap. */ - Stats::Vector<> mshr_cap_events; - /** The number of times software prefetches caused the MSHR to block. */ - Stats::Vector<> soft_prefetch_mshr_full; - - Stats::Scalar<> mshr_no_allocate_misses; - - /** - * @} - */ - - private: - /** Pointer to the MSHR that has no targets. */ - MSHR* noTargetMSHR; - - /** - * Allocate a new MSHR to handle the provided miss. - * @param pkt The miss to buffer. - * @param size The number of bytes to fetch. - * @param time The time the miss occurs. - * @return A pointer to the new MSHR. - */ - MSHR* allocateMiss(PacketPtr &pkt, int size, Tick time); - - /** - * Allocate a new WriteBuffer to handle the provided write. - * @param pkt The write to handle. - * @param size The number of bytes to write. - * @param time The time the write occurs. - * @return A pointer to the new write buffer. - */ - MSHR* allocateWrite(PacketPtr &pkt, int size, Tick time); - - public: - /** - * Simple Constructor. Initializes all needed internal storage and sets - * parameters. - * @param numMSHRs The number of outstanding misses to handle. - * @param numTargets The number of outstanding targets to each miss. - * @param write_buffers The number of outstanding writes to handle. - * @param write_allocate If true, treat write misses the same as reads. - */ - MissQueue(int numMSHRs, int numTargets, int write_buffers, - bool write_allocate, bool prefetch_miss); - - /** - * Deletes all allocated internal storage. - */ - ~MissQueue(); - - /** - * Register statistics for this object. - * @param name The name of the parent cache. - */ - void regStats(const std::string &name); - - /** - * Handle a cache miss properly. Either allocate an MSHR for the request, - * or forward it through the write buffer. - * @param pkt The request that missed in the cache. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - */ - void handleMiss(PacketPtr &pkt, int blk_size, Tick time); - - /** - * Fetch the block for the given address and buffer the given target. - * @param addr The address to fetch. - * @param asid The address space of the address. - * @param blk_size The block size of the cache. - * @param time The time the miss is detected. - * @param target The target for the fetch. - */ - MSHR* fetchBlock(Addr addr, int blk_size, Tick time, - PacketPtr &target); - - /** - * Selects a outstanding request to service. - * @return The request to service, NULL if none found. - */ - PacketPtr getPacket(); - - /** - * Set the command to the given bus command. - * @param pkt The request to update. - * @param cmd The bus command to use. - */ - void setBusCmd(PacketPtr &pkt, MemCmd cmd); - - /** - * Restore the original command in case of a bus transmission error. - * @param pkt The request to reset. - */ - void restoreOrigCmd(PacketPtr &pkt); - - /** - * Marks a request as in service (sent on the bus). This can have side - * effect since storage for no response commands is deallocated once they - * are successfully sent. - * @param pkt The request that was sent on the bus. - */ - void markInService(PacketPtr &pkt, MSHR* mshr); - - /** - * Collect statistics and free resources of a satisfied request. - * @param pkt The request that has been satisfied. - * @param time The time when the request is satisfied. - */ - void handleResponse(PacketPtr &pkt, Tick time); - - /** - * Removes all outstanding requests for a given thread number. If a request - * has been sent to the bus, this function removes all of its targets. - * @param threadNum The thread number of the requests to squash. - */ - void squash(int threadNum); - - /** - * Return the current number of outstanding misses. - * @return the number of outstanding misses. - */ - int getMisses() - { - return mq.getAllocatedTargets(); - } - - /** - * Searches for the supplied address in the miss queue. - * @param addr The address to look for. - * @param asid The address space id. - * @return The MSHR that contains the address, NULL if not found. - * @warning Currently only searches the miss queue. If non write allocate - * might need to search the write buffer for coherence. - */ - MSHR* findMSHR(Addr addr); - - /** - * Searches for the supplied address in the write buffer. - * @param addr The address to look for. - * @param asid The address space id. - * @param writes The list of writes that match the address. - * @return True if any writes are found - */ - bool findWrites(Addr addr, std::vector& writes); - - /** - * Perform a writeback of dirty data to the given address. - * @param addr The address to write to. - * @param asid The address space id. - * @param xc The execution context of the address space. - * @param size The number of bytes to write. - * @param data The data to write, can be NULL. - * @param compressed True if the data is compressed. - */ - void doWriteback(Addr addr, - int size, uint8_t *data, bool compressed); - - /** - * Perform the given writeback request. - * @param pkt The writeback request. - */ - void doWriteback(PacketPtr &pkt); - - /** - * Returns true if there are outstanding requests. - * @return True if there are outstanding requests. - */ - bool havePending(); - - /** - * Add a target to the given MSHR. This assumes it is in the miss queue. - * @param mshr The mshr to add a target to. - * @param pkt The target to add. - */ - void addTarget(MSHR *mshr, PacketPtr &pkt) - { - mq.allocateTarget(mshr, pkt); - } - - /** - * Allocate a MSHR to hold a list of targets to a block involved in a copy. - * If the block is marked done then the MSHR already holds the data to - * fill the block. Otherwise the block needs to be fetched. - * @param addr The address to buffer. - * @param asid The address space ID. - * @return A pointer to the allocated MSHR. - */ - MSHR* allocateTargetList(Addr addr); - -}; - -#endif //__MISS_QUEUE_HH__ diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 74dad658b..218d42339 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -54,45 +54,20 @@ MSHR::MSHR() } void -MSHR::allocate(MemCmd cmd, Addr _addr, int size, - PacketPtr &target) +MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill) { addr = _addr; - if (target) - { - //Have a request, just use it - pkt = new Packet(target->req, cmd, Packet::Broadcast, size); - pkt->time = curTick; - pkt->allocate(); - pkt->senderState = (Packet::SenderState *)this; - allocateTarget(target); - } - else - { - //need a request first - Request * req = new Request(); - req->setPhys(addr, size, 0); - //Thread context?? - pkt = new Packet(req, cmd, Packet::Broadcast, size); - pkt->time = curTick; - pkt->allocate(); - pkt->senderState = (Packet::SenderState *)this; - } -} - -// Since we aren't sure if data is being used, don't copy here. -/** - * @todo When we have a "global" data flag, might want to copy data here. - */ -void -MSHR::allocateAsBuffer(PacketPtr &target) -{ - addr = target->getAddr(); - threadNum = 0/*target->req->getThreadNum()*/; - pkt = new Packet(target->req, target->cmd, -1); - pkt->allocate(); - pkt->senderState = (Packet::SenderState*)this; - pkt->time = curTick; + size = _size; + assert(target); + isCacheFill = cacheFill; + needsExclusive = target->needsExclusive(); + _isUncacheable = target->req->isUncacheable(); + inService = false; + threadNum = 0; + ntargets = 1; + // Don't know of a case where we would allocate a new MSHR for a + // snoop (mem0-side request), so set cpuSide to true here. + targets.push_back(Target(target, true)); } void @@ -100,8 +75,6 @@ MSHR::deallocate() { assert(targets.empty()); assert(ntargets == 0); - delete pkt; - pkt = NULL; inService = false; //allocIter = NULL; //readyIter = NULL; @@ -111,16 +84,17 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr &target) +MSHR::allocateTarget(PacketPtr target, bool cpuSide) { //If we append an invalidate and we issued a read to the bus, //but now have some pending writes, we need to move //the invalidate to before the first non-read - if (inService && pkt->isRead() && target->isInvalidate()) { - std::list temp; + if (inService && !inServiceForExclusive && needsExclusive + && !cpuSide && target->isInvalidate()) { + std::list temp; while (!targets.empty()) { - if (!targets.front()->isRead()) break; + if (targets.front().pkt->needsExclusive()) break; //Place on top of temp stack temp.push_front(targets.front()); //Remove from targets @@ -129,7 +103,7 @@ MSHR::allocateTarget(PacketPtr &target) //Now that we have all the reads off until first non-read, we can //place the invalidate on - targets.push_front(target); + targets.push_front(Target(target, cpuSide)); //Now we pop off the temp_stack and put them back while (!temp.empty()) { @@ -138,22 +112,16 @@ MSHR::allocateTarget(PacketPtr &target) } } else { - targets.push_back(target); + targets.push_back(Target(target, cpuSide)); } ++ntargets; assert(targets.size() == ntargets); - /** - * @todo really prioritize the target commands. - */ - if (!inService && target->isWrite()) { - pkt->cmd = MemCmd::WriteReq; - } + needsExclusive = needsExclusive || target->needsExclusive(); } - void MSHR::dump() { @@ -167,8 +135,8 @@ MSHR::dump() for (int i = 0; i < ntargets; i++) { assert(tar_it != targets.end()); - ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", - i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex()); + ccprintf(cerr, "\t%d: Addr: %x cmd: %s\n", + i, tar_it->pkt->getAddr(), tar_it->pkt->cmdString()); tar_it++; } @@ -177,6 +145,4 @@ MSHR::dump() MSHR::~MSHR() { - if (pkt) - pkt = NULL; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index d0410acda..b38b69c52 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -36,22 +36,39 @@ #ifndef __MSHR_HH__ #define __MSHR_HH__ -#include "mem/packet.hh" #include -#include -class MSHR; +#include "mem/packet.hh" + +class CacheBlk; +class MSHRQueue; /** * Miss Status and handling Register. This class keeps all the information * needed to handle a cache miss including a list of target requests. */ -class MSHR { +class MSHR : public Packet::SenderState +{ + public: + + class Target { + public: + Tick time; //!< Time when request was received (for stats) + PacketPtr pkt; //!< Pending request packet. + bool cpuSide; //!< Did request come from cpu side or mem side? + + bool isCpuSide() { return cpuSide; } + + Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick) + : time(_time), pkt(_pkt), cpuSide(_cpuSide) + {} + }; + /** Defines the Data structure of the MSHR targetlist. */ - typedef std::list TargetList; + typedef std::list TargetList; /** Target list iterator. */ - typedef std::list::iterator TargetListIterator; + typedef std::list::iterator TargetListIterator; /** A list of MSHRs. */ typedef std::list List; /** MSHR list iterator. */ @@ -59,20 +76,35 @@ class MSHR { /** MSHR list const_iterator. */ typedef List::const_iterator ConstIterator; - /** Address of the miss. */ + /** Pointer to queue containing this MSHR. */ + MSHRQueue *queue; + + /** Address of the request. */ Addr addr; - /** Adress space id of the miss. */ - short asid; + + /** Size of the request. */ + int size; + + /** Data associated with the request (if a write). */ + uint8_t *writeData; + /** True if the request has been sent to the bus. */ bool inService; + + /** True if we will be putting the returned block in the cache */ + bool isCacheFill; + /** True if we need to get an exclusive copy of the block. */ + bool needsExclusive; + /** True if the request is uncacheable */ + bool _isUncacheable; + + /** True if the request that has been sent to the bus is for en + * exclusive copy. */ + bool inServiceForExclusive; /** Thread number of the miss. */ - int threadNum; - /** The request that is forwarded to the next level of the hierarchy. */ - PacketPtr pkt; + short threadNum; /** The number of currently allocated targets. */ short ntargets; - /** The original requesting command. */ - MemCmd originalCmd; /** Order number of assigned by the miss queue. */ uint64_t order; @@ -81,6 +113,7 @@ class MSHR { * @sa MissQueue, MSHRQueue::readyList */ Iterator readyIter; + /** * Pointer to this MSHR on the allocated list. * @sa MissQueue, MSHRQueue::allocatedList @@ -92,6 +125,9 @@ private: TargetList targets; public: + + bool isUncacheable() { return _isUncacheable; } + /** * Allocate a miss to this MSHR. * @param cmd The requesting command. @@ -100,14 +136,13 @@ public: * @param size The number of bytes to request. * @param pkt The original miss. */ - void allocate(MemCmd cmd, Addr addr, int size, - PacketPtr &pkt); + void allocate(Addr addr, int size, PacketPtr pkt, bool isFill); /** * Allocate this MSHR as a buffer for the given request. * @param target The memory request to buffer. */ - void allocateAsBuffer(PacketPtr &target); + void allocateAsBuffer(PacketPtr target); /** * Mark this MSHR as free. @@ -118,7 +153,7 @@ public: * Add a request to the list of targets. * @param target The target. */ - void allocateTarget(PacketPtr &target); + void allocateTarget(PacketPtr target, bool cpuSide); /** A simple constructor. */ MSHR(); @@ -131,7 +166,7 @@ public: */ int getNumTargets() { - return(ntargets); + return ntargets; } /** @@ -147,9 +182,9 @@ public: * Returns a reference to the first target. * @return A pointer to the first target. */ - PacketPtr getTarget() + Target *getTarget() { - return targets.front(); + return &targets.front(); } /** diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index e9aa89bf8..d58594798 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -29,22 +29,21 @@ */ /** @file - * Definition of the MSHRQueue. + * Definition of MSHRQueue class functions. */ #include "mem/cache/miss/mshr_queue.hh" -#include "sim/eventq.hh" using namespace std; -MSHRQueue::MSHRQueue(int num_mshrs, int reserve) - : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve) +MSHRQueue::MSHRQueue(int num_entries, int reserve) + : numEntries(num_entries + reserve - 1), numReserve(reserve) { allocated = 0; - inServiceMSHRs = 0; - allocatedTargets = 0; - registers = new MSHR[numMSHRs]; - for (int i = 0; i < numMSHRs; ++i) { + inServiceEntries = 0; + registers = new MSHR[numEntries]; + for (int i = 0; i < numEntries; ++i) { + registers[i].queue = this; freeList.push_back(®isters[i]); } } @@ -54,7 +53,7 @@ MSHRQueue::~MSHRQueue() delete [] registers; } -MSHR* +MSHR * MSHRQueue::findMatch(Addr addr) const { MSHR::ConstIterator i = allocatedList.begin(); @@ -87,19 +86,19 @@ MSHRQueue::findMatches(Addr addr, vector& matches) const } -MSHR* -MSHRQueue::findPending(PacketPtr &pkt) const +MSHR * +MSHRQueue::findPending(Addr addr, int size) const { MSHR::ConstIterator i = pendingList.begin(); MSHR::ConstIterator end = pendingList.end(); for (; i != end; ++i) { MSHR *mshr = *i; - if (mshr->addr < pkt->getAddr()) { - if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) { + if (mshr->addr < addr) { + if (mshr->addr + mshr->size > addr) { return mshr; } } else { - if (pkt->getAddr() + pkt->getSize() > mshr->addr) { + if (addr + size > mshr->addr) { return mshr; } } @@ -107,21 +106,15 @@ MSHRQueue::findPending(PacketPtr &pkt) const return NULL; } -MSHR* -MSHRQueue::allocate(PacketPtr &pkt, int size) +MSHR * +MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill) { - Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1); assert(!freeList.empty()); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - if (!pkt->needsResponse()) { - mshr->allocateAsBuffer(pkt); - } else { - mshr->allocate(pkt->cmd, aligned_addr, size, pkt); - allocatedTargets += 1; - } + mshr->allocate(addr, size, pkt, isFill); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->readyIter = pendingList.insert(pendingList.end(), mshr); @@ -129,51 +122,21 @@ MSHRQueue::allocate(PacketPtr &pkt, int size) return mshr; } -MSHR* -MSHRQueue::allocateFetch(Addr addr, int size, PacketPtr &target) -{ - MSHR *mshr = freeList.front(); - assert(mshr->getNumTargets() == 0); - freeList.pop_front(); - mshr->allocate(MemCmd::ReadReq, addr, size, target); - mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); - mshr->readyIter = pendingList.insert(pendingList.end(), mshr); - - allocated += 1; - return mshr; -} - -MSHR* -MSHRQueue::allocateTargetList(Addr addr, int size) -{ - MSHR *mshr = freeList.front(); - assert(mshr->getNumTargets() == 0); - freeList.pop_front(); - PacketPtr dummy; - mshr->allocate(MemCmd::ReadReq, addr, size, dummy); - mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); - mshr->inService = true; - ++inServiceMSHRs; - ++allocated; - return mshr; -} - void -MSHRQueue::deallocate(MSHR* mshr) +MSHRQueue::deallocate(MSHR *mshr) { deallocateOne(mshr); } MSHR::Iterator -MSHRQueue::deallocateOne(MSHR* mshr) +MSHRQueue::deallocateOne(MSHR *mshr) { MSHR::Iterator retval = allocatedList.erase(mshr->allocIter); freeList.push_front(mshr); allocated--; - allocatedTargets -= mshr->getNumTargets(); if (mshr->inService) { - inServiceMSHRs--; + inServiceEntries--; } else { pendingList.erase(mshr->readyIter); } @@ -192,29 +155,29 @@ MSHRQueue::moveToFront(MSHR *mshr) } void -MSHRQueue::markInService(MSHR* mshr) +MSHRQueue::markInService(MSHR *mshr) { //assert(mshr == pendingList.front()); +#if 0 if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) { assert(mshr->getNumTargets() == 0); deallocate(mshr); return; } +#endif mshr->inService = true; pendingList.erase(mshr->readyIter); //mshr->readyIter = NULL; - inServiceMSHRs += 1; + inServiceEntries += 1; //pendingList.pop_front(); } void -MSHRQueue::markPending(MSHR* mshr, MemCmd cmd) +MSHRQueue::markPending(MSHR *mshr) { //assert(mshr->readyIter == NULL); - mshr->pkt->cmd = cmd; - mshr->pkt->flags &= ~SATISFIED; mshr->inService = false; - --inServiceMSHRs; + --inServiceEntries; /** * @ todo might want to add rerequests to front of pending list for * performance. @@ -231,11 +194,8 @@ MSHRQueue::squash(int threadNum) MSHR *mshr = *i; if (mshr->threadNum == threadNum) { while (mshr->hasTargets()) { - PacketPtr target = mshr->getTarget(); mshr->popTarget(); - assert(0/*target->req->getThreadNum()*/ == threadNum); - target = NULL; } assert(!mshr->hasTargets()); assert(mshr->ntargets==0); diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 5069db661..182dfd5b2 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -32,71 +32,71 @@ * Declaration of a structure to manage MSHRs. */ -#ifndef __MSHR_QUEUE_HH__ -#define __MSHR_QUEUE_HH__ +#ifndef __MEM__CACHE__MISS__MSHR_QUEUE_HH__ +#define __MEM__CACHE__MISS__MSHR_QUEUE_HH__ #include + +#include "mem/packet.hh" #include "mem/cache/miss/mshr.hh" /** * A Class for maintaining a list of pending and allocated memory requests. */ -class MSHRQueue { +class MSHRQueue +{ private: /** MSHR storage. */ - MSHR* registers; - /** Holds pointers to all allocated MSHRs. */ + MSHR *registers; + /** Holds pointers to all allocated entries. */ MSHR::List allocatedList; - /** Holds pointers to MSHRs that haven't been sent to the bus. */ + /** Holds pointers to entries that haven't been sent to the bus. */ MSHR::List pendingList; - /** Holds non allocated MSHRs. */ + /** Holds non allocated entries. */ MSHR::List freeList; // Parameters /** - * The total number of MSHRs in this queue. This number is set as the - * number of MSHRs requested plus (numReserve - 1). This allows for - * the same number of effective MSHRs while still maintaining the reserve. + * The total number of entries in this queue. This number is set as the + * number of entries requested plus (numReserve - 1). This allows for + * the same number of effective entries while still maintaining the reserve. */ - const int numMSHRs; + const int numEntries; /** - * The number of MSHRs to hold in reserve. This is needed because copy - * operations can allocate upto 4 MSHRs at one time. + * The number of entries to hold in reserve. This is needed because copy + * operations can allocate upto 4 entries at one time. */ const int numReserve; public: - /** The number of allocated MSHRs. */ + /** The number of allocated entries. */ int allocated; - /** The number of MSHRs that have been forwarded to the bus. */ - int inServiceMSHRs; - /** The number of targets waiting for response. */ - int allocatedTargets; + /** The number of entries that have been forwarded to the bus. */ + int inServiceEntries; /** - * Create a queue with a given number of MSHRs. - * @param num_mshrs The number of MSHRs in this queue. - * @param reserve The minimum number of MSHRs needed to satisfy any access. + * Create a queue with a given number of entries. + * @param num_entrys The number of entries in this queue. + * @param reserve The minimum number of entries needed to satisfy + * any access. */ - MSHRQueue(int num_mshrs, int reserve = 1); + MSHRQueue(int num_entries, int reserve = 1); /** Destructor */ ~MSHRQueue(); /** - * Find the first MSHR that matches the provide address and asid. + * Find the first MSHR that matches the provided address. * @param addr The address to find. - * @param asid The address space id. * @return Pointer to the matching MSHR, null if not found. */ - MSHR* findMatch(Addr addr) const; + MSHR *findMatch(Addr addr) const; /** - * Find and return all the matching MSHRs in the provided vector. + * Find and return all the matching entries in the provided vector. * @param addr The address to find. - * @param asid The address space ID. - * @param matches The vector to return pointers to the matching MSHRs. + * @param matches The vector to return pointers to the matching entries. * @return True if any matches are found, false otherwise. * @todo Typedef the vector?? */ @@ -107,7 +107,7 @@ class MSHRQueue { * @param pkt The request to find. * @return A pointer to the earliest matching MSHR. */ - MSHR* findPending(PacketPtr &pkt) const; + MSHR *findPending(Addr addr, int size) const; /** * Allocates a new MSHR for the request and size. This places the request @@ -116,60 +116,29 @@ class MSHRQueue { * @param size The number in bytes to fetch from memory. * @return The a pointer to the MSHR allocated. * - * @pre There are free MSHRs. + * @pre There are free entries. */ - MSHR* allocate(PacketPtr &pkt, int size = 0); - - /** - * Allocate a read request for the given address, and places the given - * target on the target list. - * @param addr The address to fetch. - * @param asid The address space for the fetch. - * @param size The number of bytes to request. - * @param target The first target for the request. - * @return Pointer to the new MSHR. - */ - MSHR* allocateFetch(Addr addr, int size, PacketPtr &target); - - /** - * Allocate a target list for the given address. - * @param addr The address to fetch. - * @param asid The address space for the fetch. - * @param size The number of bytes to request. - * @return Pointer to the new MSHR. - */ - MSHR* allocateTargetList(Addr addr, int size); + MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill); /** * Removes the given MSHR from the queue. This places the MSHR on the * free list. * @param mshr */ - void deallocate(MSHR* mshr); - - /** - * Allocates a target to the given MSHR. Used to keep track of the number - * of outstanding targets. - * @param mshr The MSHR to allocate the target to. - * @param pkt The target request. - */ - void allocateTarget(MSHR* mshr, PacketPtr &pkt) - { - mshr->allocateTarget(pkt); - allocatedTargets += 1; - } + void deallocate(MSHR *mshr); /** - * Remove a MSHR from the queue. Returns an iterator into the allocatedList - * for faster squash implementation. + * Remove a MSHR from the queue. Returns an iterator into the + * allocatedList for faster squash implementation. * @param mshr The MSHR to remove. * @return An iterator to the next entry in the allocatedList. */ - MSHR::Iterator deallocateOne(MSHR* mshr); + MSHR::Iterator deallocateOne(MSHR *mshr); /** - * Moves the MSHR to the front of the pending list if it is not in service. - * @param mshr The mshr to move. + * Moves the MSHR to the front of the pending list if it is not + * in service. + * @param mshr The entry to move. */ void moveToFront(MSHR *mshr); @@ -178,14 +147,13 @@ class MSHRQueue { * pendingList. Deallocates the MSHR if it does not expect a response. * @param mshr The MSHR to mark in service. */ - void markInService(MSHR* mshr); + void markInService(MSHR *mshr); /** - * Mark an in service mshr as pending, used to resend a request. + * Mark an in service entry as pending, used to resend a request. * @param mshr The MSHR to resend. - * @param cmd The command to resend. */ - void markPending(MSHR* mshr, MemCmd cmd); + void markPending(MSHR *mshr); /** * Squash outstanding requests with the given thread number. If a request @@ -204,36 +172,25 @@ class MSHRQueue { } /** - * Returns true if there are no free MSHRs. + * Returns true if there are no free entries. * @return True if this queue is full. */ bool isFull() const { - return (allocated > numMSHRs - numReserve); + return (allocated > numEntries - numReserve); } /** - * Returns the request at the head of the pendingList. + * Returns the MSHR at the head of the pendingList. * @return The next request to service. */ - PacketPtr getReq() const + MSHR *getNextMSHR() const { if (pendingList.empty()) { return NULL; } - MSHR* mshr = pendingList.front(); - return mshr->pkt; + return pendingList.front(); } - - /** - * Returns the number of outstanding targets. - * @return the number of allocated targets. - */ - int getAllocatedTargets() const - { - return allocatedTargets; - } - }; -#endif //__MSHR_QUEUE_HH__ +#endif //__MEM__CACHE__MISS__MSHR_QUEUE_HH__ diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 966f7d005..d03cfe3ae 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -241,7 +241,6 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) } pf.push_back(prefetch); - prefetch->flags |= CACHE_LINE_FILL; //Make sure to request the bus, with proper delay cache->requestMemSideBus(Request_PF, prefetch->time); diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 42a1fe34f..607e89a75 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -215,14 +215,13 @@ FALRU::findBlock(Addr addr) const } FALRUBlk* -FALRU::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +FALRU::findReplacement(Addr addr, PacketList &writebacks) { FALRUBlk * blk = tail; assert(blk->inCache == 0); moveToHead(blk); tagHash.erase(blk->tag); - tagHash[blkAlign(pkt->getAddr())] = blk; + tagHash[blkAlign(addr)] = blk; if (blk->isValid()) { replacements[0]++; } else { diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index dabbda740..8cbc79813 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -201,11 +201,9 @@ public: * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - FALRUBlk* findReplacement(PacketPtr &pkt, PacketList & writebacks, - BlkList &compress_blocks); + FALRUBlk* findReplacement(Addr addr, PacketList & writebacks); /** * Return the hit latency of this cache. @@ -248,10 +246,9 @@ public: * Generate the tag from the addres. For fully associative this is just the * block address. * @param addr The address to get the tag from. - * @param blk ignored here * @return The tag. */ - Addr extractTag(Addr addr, FALRUBlk *blk) const + Addr extractTag(Addr addr) const { return blkAlign(addr); } diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index 9c802d0dc..2f95cdb0f 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -303,11 +303,10 @@ IIC::findBlock(Addr addr) const IICTag* -IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +IIC::findReplacement(Addr addr, PacketList &writebacks) { - DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr()); - unsigned set = hash(pkt->getAddr()); + DPRINTF(IIC, "Finding Replacement for %x\n", addr); + unsigned set = hash(addr); IICTag *tag_ptr; unsigned long *tmp_data = new unsigned long[numSub]; @@ -332,12 +331,14 @@ IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks, list tag_indexes; repl->doAdvance(tag_indexes); +/* while (!tag_indexes.empty()) { if (!tagStore[tag_indexes.front()].isCompressed()) { compress_blocks.push_back(&tagStore[tag_indexes.front()]); } tag_indexes.pop_front(); } +*/ tag_ptr->re = (void*)repl->add(tag_ptr-tagStore); @@ -355,7 +356,7 @@ IIC::freeReplacementBlock(PacketList & writebacks) DPRINTF(Cache, "Replacing %x in IIC: %s\n", regenerateBlkAddr(tag_ptr->tag,0), - tag_ptr->isModified() ? "writeback" : "clean"); + tag_ptr->isDirty() ? "writeback" : "clean"); /* write back replaced block data */ if (tag_ptr && (tag_ptr->isValid())) { replacements[0]++; @@ -363,7 +364,7 @@ IIC::freeReplacementBlock(PacketList & writebacks) ++sampledRefs; tag_ptr->refCount = 0; - if (tag_ptr->isModified()) { + if (tag_ptr->isDirty()) { /* PacketPtr writeback = buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), tag_ptr->req->asid, tag_ptr->xc, blkSize, @@ -618,24 +619,6 @@ IIC::secondaryChain(Addr tag, unsigned long chain_ptr, return NULL; } -void -IIC::decompressBlock(unsigned long index) -{ - IICTag *tag_ptr = &tagStore[index]; - if (tag_ptr->isCompressed()) { - // decompress the data here. - } -} - -void -IIC::compressBlock(unsigned long index) -{ - IICTag *tag_ptr = &tagStore[index]; - if (!tag_ptr->isCompressed()) { - // Compress the data here. - } -} - void IIC::invalidateBlk(IIC::BlkType *tag_ptr) { @@ -672,7 +655,6 @@ void IIC::writeData(IICTag *blk, uint8_t *write_data, int size, PacketList & writebacks) { - assert(size < blkSize || !blk->isCompressed()); DPRINTF(IIC, "Writing %d bytes to %x\n", size, blk->tag<> tagShift); - } - /** * Generate the tag from the address. * @param addr The address to a get a tag for. @@ -422,18 +411,6 @@ class IIC : public BaseTags return tmp; } - /** - * Decompress a block if it is compressed. - * @param index The tag store index for the block to uncompress. - */ - void decompressBlock(unsigned long index); - - /** - * Try and compress a block if it is not already compressed. - * @param index The tag store index for the block to compress. - */ - void compressBlock(unsigned long index); - /** * Invalidate a block. * @param blk The block to invalidate. @@ -462,11 +439,9 @@ class IIC : public BaseTags * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - IICTag* findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks); + IICTag* findReplacement(Addr addr, PacketList &writebacks); /** * Read the data from the internal storage of the given cache block. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 8e8779774..334312aaf 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -194,10 +194,9 @@ LRU::findBlock(Addr addr) const } LRUBlk* -LRU::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +LRU::findReplacement(Addr addr, PacketList &writebacks) { - unsigned set = extractSet(pkt->getAddr()); + unsigned set = extractSet(addr); // grab a replacement candidate LRUBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index 75272544c..26038d709 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -189,11 +189,9 @@ public: * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - LRUBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks); + LRUBlk* findReplacement(Addr addr, PacketList &writebacks); /** * Generate the tag from the given address. @@ -205,17 +203,6 @@ public: return (addr >> tagShift); } - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @param blk Ignored. - * @return The tag of the address. - */ - Addr extractTag(Addr addr, LRUBlk *blk) const - { - return (addr >> tagShift); - } - /** * Calculate the set index from the address. * @param addr The address to get the set from. diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc index 5ac87eaba..e22ccbb96 100644 --- a/src/mem/cache/tags/split.cc +++ b/src/mem/cache/tags/split.cc @@ -298,27 +298,25 @@ Split::findBlock(Addr addr) const } SplitBlk* -Split::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +Split::findReplacement(Addr addr, PacketList &writebacks) { SplitBlk *blk; + assert(0); +#if 0 if (pkt->nic_pkt()) { DPRINTF(Split, "finding a replacement for nic_req\n"); nic_repl++; if (lifo && lifo_net) - blk = lifo_net->findReplacement(pkt, writebacks, - compress_blocks); + blk = lifo_net->findReplacement(addr, writebacks); else if (lru_net) - blk = lru_net->findReplacement(pkt, writebacks, - compress_blocks); + blk = lru_net->findReplacement(addr, writebacks); // in this case, this is an LRU only cache, it's non partitioned else - blk = lru->findReplacement(pkt, writebacks, compress_blocks); + blk = lru->findReplacement(addr, writebacks); } else { DPRINTF(Split, "finding replacement for cpu_req\n"); - blk = lru->findReplacement(pkt, writebacks, - compress_blocks); + blk = lru->findReplacement(addr, writebacks); cpu_repl++; } @@ -346,6 +344,7 @@ Split::findReplacement(PacketPtr &pkt, PacketList &writebacks, // blk attributes for the new blk coming IN blk->ts = curTick; blk->isNIC = (pkt->nic_pkt()) ? true : false; +#endif return blk; } @@ -400,8 +399,13 @@ Split::regenerateBlkAddr(Addr tag, int set) const } Addr -Split::extractTag(Addr addr, SplitBlk *blk) const +Split::extractTag(Addr addr) const { + // need to fix this if we want to use it... old interface of + // passing in blk was too weird + assert(0); + return 0; +/* if (blk->part == 2) { if (lifo_net) return lifo_net->extractTag(addr); @@ -411,5 +415,6 @@ Split::extractTag(Addr addr, SplitBlk *blk) const panic("this shouldn't happen"); } else return lru->extractTag(addr); +*/ } diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh index 840b68940..ab48ce769 100644 --- a/src/mem/cache/tags/split.hh +++ b/src/mem/cache/tags/split.hh @@ -212,20 +212,17 @@ class Split : public BaseTags * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks); + SplitBlk* findReplacement(Addr addr, PacketList &writebacks); /** * Generate the tag from the given address. * @param addr The address to get the tag from. - * @param blk The block to find the partition it's in * @return The tag of the address. */ - Addr extractTag(Addr addr, SplitBlk *blk) const; + Addr extractTag(Addr addr) const; /** * Calculate the set index from the address. diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc index d71d1a3ef..4ee2473a4 100644 --- a/src/mem/cache/tags/split_lifo.cc +++ b/src/mem/cache/tags/split_lifo.cc @@ -266,10 +266,9 @@ SplitLIFO::findBlock(Addr addr) const } SplitBlk* -SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +SplitLIFO::findReplacement(Addr addr, PacketList &writebacks) { - unsigned set = extractSet(pkt->getAddr()); + unsigned set = extractSet(addr); SplitBlk *firstIn = sets[set].firstIn; SplitBlk *lastIn = sets[set].lastIn; @@ -289,7 +288,7 @@ SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks, } DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", - pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status); + addr, regenerateBlkAddr(blk->tag, set), blk->status); if (blk->isValid()) { replacements[0]++; totalRefs += blk->refCount; diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh index 0f8adf18d..13ccf7ef4 100644 --- a/src/mem/cache/tags/split_lifo.hh +++ b/src/mem/cache/tags/split_lifo.hh @@ -212,11 +212,9 @@ public: * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks); + SplitBlk* findReplacement(Addr addr, PacketList &writebacks); /** * Generate the tag from the given address. @@ -228,17 +226,6 @@ public: return (addr >> tagShift); } - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @param blk Ignored - * @return The tag of the address. - */ - Addr extractTag(Addr addr, SplitBlk *blk) const - { - return (addr >> tagShift); - } - /** * Calculate the set index from the address. * @param addr The address to get the set from. diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc index 7227fb5c1..4d271a92a 100644 --- a/src/mem/cache/tags/split_lru.cc +++ b/src/mem/cache/tags/split_lru.cc @@ -213,10 +213,9 @@ SplitLRU::findBlock(Addr addr) const } SplitBlk* -SplitLRU::findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks) +SplitLRU::findReplacement(Addr addr, PacketList &writebacks) { - unsigned set = extractSet(pkt->getAddr()); + unsigned set = extractSet(addr); // grab a replacement candidate SplitBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh index eb65445ea..a708ef740 100644 --- a/src/mem/cache/tags/split_lru.hh +++ b/src/mem/cache/tags/split_lru.hh @@ -195,11 +195,9 @@ public: * Find a replacement block for the address provided. * @param pkt The request to a find a replacement candidate for. * @param writebacks List for any writebacks to be performed. - * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks, - BlkList &compress_blocks); + SplitBlk* findReplacement(Addr addr, PacketList &writebacks); /** * Generate the tag from the given address. @@ -211,17 +209,6 @@ public: return (addr >> tagShift); } - /** - * Generate the tag from the given address. - * @param addr The address to get the tag from. - * @param blk Ignored. - * @return The tag of the address. - */ - Addr extractTag(Addr addr, SplitBlk *blk) const - { - return (addr >> tagShift); - } - /** * Calculate the set index from the address. * @param addr The address to get the set from. diff --git a/src/mem/packet.cc b/src/mem/packet.cc index a257e16ab..57c6a6381 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -59,15 +59,15 @@ MemCmd::commandInfo[] = /* ReadResp */ { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" }, /* WriteReq */ - { SET4(IsWrite, IsRequest, NeedsResponse, HasData), + { SET5(IsWrite, NeedsExclusive, IsRequest, NeedsResponse, HasData), WriteResp, "WriteReq" }, /* WriteResp */ - { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" }, + { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" }, /* Writeback */ - { SET4(IsWrite, IsRequest, HasData, NeedsResponse), + { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse), WritebackAck, "Writeback" }, /* WritebackAck */ - { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" }, + { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" }, /* SoftPFReq */ { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse), SoftPFResp, "SoftPFReq" }, @@ -80,27 +80,39 @@ MemCmd::commandInfo[] = /* HardPFResp */ { SET4(IsRead, IsResponse, IsHWPrefetch, HasData), InvalidCmd, "HardPFResp" }, - /* InvalidateReq */ - { SET2(IsInvalidate, IsRequest), InvalidCmd, "InvalidateReq" }, /* WriteInvalidateReq */ - { SET5(IsWrite, IsInvalidate, IsRequest, HasData, NeedsResponse), + { SET6(IsWrite, NeedsExclusive, IsInvalidate, + IsRequest, HasData, NeedsResponse), WriteInvalidateResp, "WriteInvalidateReq" }, /* WriteInvalidateResp */ - { SET3(IsWrite, IsInvalidate, IsResponse), + { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse), InvalidCmd, "WriteInvalidateResp" }, /* UpgradeReq */ { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" }, /* ReadExReq */ - { SET4(IsRead, IsInvalidate, IsRequest, NeedsResponse), + { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse), ReadExResp, "ReadExReq" }, /* ReadExResp */ - { SET4(IsRead, IsInvalidate, IsResponse, HasData), + { SET5(IsRead, NeedsExclusive, IsInvalidate, IsResponse, HasData), InvalidCmd, "ReadExResp" }, + /* LoadLockedReq */ + { SET4(IsRead, IsLocked, IsRequest, NeedsResponse), + ReadResp, "LoadLockedReq" }, + /* LoadLockedResp */ + { SET4(IsRead, IsLocked, IsResponse, HasData), + InvalidCmd, "LoadLockedResp" }, + /* StoreCondReq */ + { SET6(IsWrite, NeedsExclusive, IsLocked, + IsRequest, NeedsResponse, HasData), + StoreCondResp, "StoreCondReq" }, + /* StoreCondResp */ + { SET4(IsWrite, NeedsExclusive, IsLocked, IsResponse), + InvalidCmd, "StoreCondResp" }, /* SwapReq -- for Swap ldstub type operations */ - { SET4(IsReadWrite, IsRequest, HasData, NeedsResponse), + { SET6(IsRead, IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse), SwapResp, "SwapReq" }, /* SwapResp -- for Swap ldstub type operations */ - { SET3(IsReadWrite, IsResponse, HasData), + { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData), InvalidCmd, "SwapResp" } }; @@ -171,27 +183,28 @@ fixDelayedResponsePacket(PacketPtr func, PacketPtr timing) } bool -fixPacket(PacketPtr func, PacketPtr timing) +Packet::checkFunctional(Addr addr, int size, uint8_t *data) { - Addr funcStart = func->getAddr(); - Addr funcEnd = func->getAddr() + func->getSize() - 1; - Addr timingStart = timing->getAddr(); - Addr timingEnd = timing->getAddr() + timing->getSize() - 1; + Addr func_start = getAddr(); + Addr func_end = getAddr() + getSize() - 1; + Addr val_start = addr; + Addr val_end = val_start + size - 1; - assert(!(funcStart > timingEnd || timingStart > funcEnd)); + if (func_start > val_end || val_start > func_end) { + // no intersection + return false; + } - // this packet can't solve our problem, continue on - if (!timing->hasData()) - return true; + // offset of functional request into supplied value (could be + // negative if partial overlap) + int offset = func_start - val_start; - if (func->isRead()) { - if (funcStart >= timingStart && funcEnd <= timingEnd) { - func->allocate(); - std::memcpy(func->getPtr(), timing->getPtr() + - funcStart - timingStart, func->getSize()); - func->result = Packet::Success; - func->flags |= SATISFIED; - return false; + if (isRead()) { + if (func_start >= val_start && func_end <= val_end) { + allocate(); + std::memcpy(getPtr(), data + offset, getSize()); + result = Packet::Success; + return true; } else { // In this case the timing packet only partially satisfies // the request, so we would need more information to make @@ -199,25 +212,21 @@ fixPacket(PacketPtr func, PacketPtr timing) // something, so the request could continue and get this // bit of possibly newer data along with the older data // not written to yet. - panic("Timing packet only partially satisfies the functional" - "request. Now what?"); + panic("Memory value only partially satisfies the functional " + "request. Now what?"); } - } else if (func->isWrite()) { - if (funcStart >= timingStart) { - std::memcpy(timing->getPtr() + (funcStart - timingStart), - func->getPtr(), - (std::min(funcEnd, timingEnd) - funcStart) + 1); - } else { // timingStart > funcStart - std::memcpy(timing->getPtr(), - func->getPtr() + (timingStart - funcStart), - (std::min(funcEnd, timingEnd) - timingStart) + 1); + } else if (isWrite()) { + if (offset >= 0) { + std::memcpy(data + offset, getPtr(), + (std::min(func_end, val_end) - func_start) + 1); + } else { // val_start > func_start + std::memcpy(data, getPtr() - offset, + (std::min(func_end, val_end) - val_start) + 1); } // we always want to keep going with a write - return true; + return false; } else - panic("Don't know how to handle command type %#x\n", - func->cmdToIndex()); - + panic("Don't know how to handle command %s\n", cmdString()); } @@ -247,8 +256,6 @@ operator<<(std::ostream &o, const Packet &p) o << "Read "; if (p.isWrite()) o << "Write "; - if (p.isReadWrite()) - o << "Read/Write "; if (p.isInvalidate()) o << "Invalidate "; if (p.isRequest()) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index e2349e42f..ca186d875 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -54,16 +54,6 @@ typedef Packet *PacketPtr; typedef uint8_t* PacketDataPtr; typedef std::list PacketList; -//Coherence Flags -#define NACKED_LINE (1 << 0) -#define SATISFIED (1 << 1) -#define SHARED_LINE (1 << 2) -#define CACHE_LINE_FILL (1 << 3) -#define COMPRESSED (1 << 4) -#define NO_ALLOCATE (1 << 5) - -#define EXPRESS_SNOOP (1 << 7) - class MemCmd { public: @@ -82,12 +72,15 @@ class MemCmd HardPFReq, SoftPFResp, HardPFResp, - InvalidateReq, WriteInvalidateReq, WriteInvalidateResp, UpgradeReq, ReadExReq, ReadExResp, + LoadLockedReq, + LoadLockedResp, + StoreCondReq, + StoreCondResp, SwapReq, SwapResp, NUM_MEM_CMDS @@ -97,18 +90,19 @@ class MemCmd /** List of command attributes. */ enum Attribute { - IsRead, - IsWrite, - IsPrefetch, + IsRead, //!< Data flows from responder to requester + IsWrite, //!< Data flows from requester to responder + IsPrefetch, //!< Not a demand access IsInvalidate, - IsRequest, - IsResponse, - NeedsResponse, + NeedsExclusive, //!< Requires exclusive copy to complete in-cache + IsRequest, //!< Issued by requester + IsResponse, //!< Issue by responder + NeedsResponse, //!< Requester needs response from target IsSWPrefetch, IsHWPrefetch, IsUpgrade, - HasData, - IsReadWrite, + IsLocked, //!< Alpha/MIPS LL or SC access + HasData, //!< There is an associated payload NUM_COMMAND_ATTRIBUTES }; @@ -141,10 +135,12 @@ class MemCmd bool isWrite() const { return testCmdAttrib(IsWrite); } bool isRequest() const { return testCmdAttrib(IsRequest); } bool isResponse() const { return testCmdAttrib(IsResponse); } + bool needsExclusive() const { return testCmdAttrib(NeedsExclusive); } bool needsResponse() const { return testCmdAttrib(NeedsResponse); } bool isInvalidate() const { return testCmdAttrib(IsInvalidate); } bool hasData() const { return testCmdAttrib(HasData); } - bool isReadWrite() const { return testCmdAttrib(IsReadWrite); } + bool isReadWrite() const { return isRead() && isWrite(); } + bool isLocked() const { return testCmdAttrib(IsLocked); } const Command responseCommand() const { return commandInfo[cmd].response; @@ -188,9 +184,6 @@ class Packet typedef MemCmd::Command Command; - /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */ - uint64_t flags; - private: /** A pointer to the data being transfered. It can be differnt * sizes at each level of the heirarchy so it belongs in the @@ -235,6 +228,14 @@ class Packet /** Is the 'src' field valid? */ bool srcValid; + enum SnoopFlag { + MemInhibit, + Shared, + NUM_SNOOP_FLAGS + }; + + /** Coherence snoopFlags for snooping */ + std::bitset snoopFlags; public: @@ -301,14 +302,17 @@ class Packet bool isWrite() const { return cmd.isWrite(); } bool isRequest() const { return cmd.isRequest(); } bool isResponse() const { return cmd.isResponse(); } + bool needsExclusive() const { return cmd.needsExclusive(); } bool needsResponse() const { return cmd.needsResponse(); } bool isInvalidate() const { return cmd.isInvalidate(); } bool hasData() const { return cmd.hasData(); } bool isReadWrite() const { return cmd.isReadWrite(); } + bool isLocked() const { return cmd.isLocked(); } - bool isCacheFill() const { return (flags & CACHE_LINE_FILL) != 0; } - bool isNoAllocate() const { return (flags & NO_ALLOCATE) != 0; } - bool isCompressed() const { return (flags & COMPRESSED) != 0; } + void assertMemInhibit() { snoopFlags[MemInhibit] = true; } + void assertShared() { snoopFlags[Shared] = true; } + bool memInhibitAsserted() { return snoopFlags[MemInhibit]; } + bool sharedAsserted() { return snoopFlags[Shared]; } bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN } @@ -327,6 +331,8 @@ class Packet /** Accessor function that returns the source index of the packet. */ short getSrc() const { assert(srcValid); return src; } void setSrc(short _src) { src = _src; srcValid = true; } + /** Reset source field, e.g. to retransmit packet on different bus. */ + void clearSrc() { srcValid = false; } /** Accessor function that returns the destination index of the packet. */ @@ -347,13 +353,12 @@ class Packet Packet(Request *_req, MemCmd _cmd, short _dest) : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), - addrSizeValid(_req->validPaddr), - srcValid(false), + addrSizeValid(_req->validPaddr), srcValid(false), + snoopFlags(0), + time(curTick), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { - flags = 0; - time = curTick; } /** Alternate constructor if you are trying to create a packet with @@ -361,14 +366,32 @@ class Packet * this allows for overriding the size/addr of the req.*/ Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize) : data(NULL), staticData(false), dynamicData(false), arrayData(false), - addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), - dest(_dest), + addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest), addrSizeValid(_req->validPaddr), srcValid(false), + snoopFlags(0), + time(curTick), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { - flags = 0; - time = curTick; + } + + /** Alternate constructor for copying a packet. Copy all fields + * *except* set data allocation as static... even if the original + * packet's data was dynamic, we don't want to free it when the + * new packet is deallocated. Note that if original packet used + * dynamic data, user must guarantee that the new packet's + * lifetime is less than that of the original packet. */ + Packet(Packet *origPkt) + : data(NULL), staticData(false), dynamicData(false), arrayData(false), + addr(origPkt->addr), size(origPkt->size), + dest(origPkt->dest), + addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid), + snoopFlags(origPkt->snoopFlags), + time(curTick), + req(origPkt->req), coherence(origPkt->coherence), + senderState(origPkt->senderState), cmd(origPkt->cmd), + result(origPkt->result) + { } /** Destructor. */ @@ -382,7 +405,7 @@ class Packet * multiple transactions. */ void reinitFromRequest() { assert(req->validPaddr); - flags = 0; + snoopFlags = 0; addr = req->paddr; size = req->size; time = req->time; @@ -395,29 +418,40 @@ class Packet } } - /** Take a request packet and modify it in place to be suitable - * for returning as a response to that request. Used for timing - * accesses only. For atomic and functional accesses, the - * request packet is always implicitly passed back *without* - * modifying the destination fields, so this function - * should not be called. */ - void makeTimingResponse() { + /** + * Take a request packet and modify it in place to be suitable for + * returning as a response to that request. The source and + * destination fields are *not* modified, as is appropriate for + * atomic accesses. + */ + void makeAtomicResponse() + { assert(needsResponse()); assert(isRequest()); + assert(result == Unknown); cmd = cmd.responseCommand(); + result = Success; + } + + /** + * Perform the additional work required for timing responses above + * and beyond atomic responses; i.e., change the destination to + * point back to the requester and clear the source field. + */ + void convertAtomicToTimingResponse() + { dest = src; srcValid = false; } /** * Take a request packet and modify it in place to be suitable for - * returning as a response to that request. + * returning as a response to a timing request. */ - void makeAtomicResponse() + void makeTimingResponse() { - assert(needsResponse()); - assert(isRequest()); - cmd = cmd.responseCommand(); + makeAtomicResponse(); + convertAtomicToTimingResponse(); } /** @@ -493,6 +527,40 @@ class Packet template void set(T v); + /** + * Copy data into the packet from the provided pointer. + */ + void setData(uint8_t *p) + { + std::memcpy(getPtr(), p, getSize()); + } + + /** + * Copy data into the packet from the provided block pointer, + * which is aligned to the given block size. + */ + void setDataFromBlock(uint8_t *blk_data, int blkSize) + { + setData(blk_data + getOffset(blkSize)); + } + + /** + * Copy data from the packet to the provided block pointer, which + * is aligned to the given block size. + */ + void writeData(uint8_t *p) + { + std::memcpy(p, getPtr(), getSize()); + } + + /** + * Copy data from the packet to the memory at the provided pointer. + */ + void writeDataToBlock(uint8_t *blk_data, int blkSize) + { + writeData(blk_data + getOffset(blkSize)); + } + /** * delete the data pointed to in the data pointer. Ok to call to * matter how data was allocted. @@ -504,15 +572,35 @@ class Packet /** Do the packet modify the same addresses. */ bool intersect(PacketPtr p); + + /** + * Check a functional request against a memory value represented + * by a base/size pair and an associated data array. If the + * functional request is a read, it may be satisfied by the memory + * value. If the functional request is a write, it may update the + * memory value. + */ + bool checkFunctional(Addr base, int size, uint8_t *data); + + /** + * Check a functional request against a memory value stored in + * another packet (i.e. an in-transit request or response). + */ + bool checkFunctional(PacketPtr otherPkt) { + return (otherPkt->hasData() && + checkFunctional(otherPkt->getAddr(), otherPkt->getSize(), + otherPkt->getPtr())); + } }; -/** This function given a functional packet and a timing packet either - * satisfies the timing packet, or updates the timing packet to - * reflect the updated state in the timing packet. It returns if the - * functional packet should continue to traverse the memory hierarchy - * or not. + + +/** Temporary for backwards compatibility. */ -bool fixPacket(PacketPtr func, PacketPtr timing); +inline +bool fixPacket(PacketPtr func, PacketPtr timing) { + return !func->checkFunctional(timing); +} /** This function is a wrapper for the fixPacket field that toggles * the hasData bit it is used when a response is waiting in the diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 9d840fe69..93cba96c4 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -58,8 +58,9 @@ PhysicalMemory::PhysicalMemory(Params *p) panic("Memory Size not divisible by page size\n"); int map_flags = MAP_ANON | MAP_PRIVATE; - pmemAddr = (uint8_t *)mmap(NULL, params()->addrRange.size(), PROT_READ | PROT_WRITE, - map_flags, -1, 0); + pmemAddr = + (uint8_t *)mmap(NULL, params()->addrRange.size(), + PROT_READ | PROT_WRITE, map_flags, -1, 0); if (pmemAddr == (void *)MAP_FAILED) { perror("mmap"); @@ -121,8 +122,9 @@ PhysicalMemory::calculateLatency(PacketPtr pkt) // Add load-locked to tracking list. Should only be called if the // operation is a load and the LOCKED flag is set. void -PhysicalMemory::trackLoadLocked(Request *req) +PhysicalMemory::trackLoadLocked(PacketPtr pkt) { + Request *req = pkt->req; Addr paddr = LockedAddr::mask(req->getPaddr()); // first we check if we already have a locked addr for this @@ -151,10 +153,11 @@ PhysicalMemory::trackLoadLocked(Request *req) // conflict with locked addresses, and for success/failure of store // conditionals. bool -PhysicalMemory::checkLockedAddrList(Request *req) +PhysicalMemory::checkLockedAddrList(PacketPtr pkt) { + Request *req = pkt->req; Addr paddr = LockedAddr::mask(req->getPaddr()); - bool isLocked = req->isLocked(); + bool isLocked = pkt->isLocked(); // Initialize return value. Non-conditional stores always // succeed. Assume conditional stores will fail until proven @@ -198,74 +201,50 @@ PhysicalMemory::checkLockedAddrList(Request *req) return success; } -void -PhysicalMemory::doFunctionalAccess(PacketPtr pkt) + +#if TRACING_ON + +#define CASE(A, T) \ + case sizeof(T): \ + DPRINTF(MemoryAccess, A " of size %i on address 0x%x data 0x%x\n", \ + pkt->getSize(), pkt->getAddr(), pkt->get()); \ + break + + +#define TRACE_PACKET(A) \ + do { \ + switch (pkt->getSize()) { \ + CASE(A, uint64_t); \ + CASE(A, uint32_t); \ + CASE(A, uint16_t); \ + CASE(A, uint8_t); \ + default: \ + DPRINTF(MemoryAccess, A " of size %i on address 0x%x\n", \ + pkt->getSize(), pkt->getAddr()); \ + } \ + } while (0) + +#else + +#define TRACE_PACKET(A) + +#endif + +Tick +PhysicalMemory::doAtomicAccess(PacketPtr pkt) { assert(pkt->getAddr() >= start() && pkt->getAddr() + pkt->getSize() <= start() + size()); - if (pkt->isRead()) { - if (pkt->req->isLocked()) { - trackLoadLocked(pkt->req); - } - memcpy(pkt->getPtr(), pmemAddr + pkt->getAddr() - start(), - pkt->getSize()); -#if TRACING_ON - switch (pkt->getSize()) { - case sizeof(uint64_t): - DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint32_t): - DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint16_t): - DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint8_t): - DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - default: - DPRINTF(MemoryAccess, "Read of size %i on address 0x%x\n", - pkt->getSize(), pkt->getAddr()); - } -#endif + if (pkt->memInhibitAsserted()) { + DPRINTF(MemoryAccess, "mem inhibited on 0x%x: not responding\n", + pkt->getAddr()); + return 0; } - else if (pkt->isWrite()) { - if (writeOK(pkt->req)) { - memcpy(pmemAddr + pkt->getAddr() - start(), pkt->getPtr(), - pkt->getSize()); -#if TRACING_ON - switch (pkt->getSize()) { - case sizeof(uint64_t): - DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint32_t): - DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint16_t): - DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - case sizeof(uint8_t): - DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - break; - default: - DPRINTF(MemoryAccess, "Write of size %i on address 0x%x\n", - pkt->getSize(), pkt->getAddr()); - } -#endif - } - } else if (pkt->isInvalidate()) { - //upgrade or invalidate - pkt->flags |= SATISFIED; - } else if (pkt->isReadWrite()) { + + uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start(); + + if (pkt->cmd == MemCmd::SwapReq) { IntReg overwrite_val; bool overwrite_mem; uint64_t condition_val64; @@ -277,66 +256,76 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt) // keep a copy of our possible write value, and copy what is at the // memory address into the packet std::memcpy(&overwrite_val, pkt->getPtr(), pkt->getSize()); - std::memcpy(pkt->getPtr(), pmemAddr + pkt->getAddr() - start(), - pkt->getSize()); + std::memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); if (pkt->req->isCondSwap()) { if (pkt->getSize() == sizeof(uint64_t)) { condition_val64 = pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val64, pmemAddr + - pkt->getAddr() - start(), sizeof(uint64_t)); + overwrite_mem = !std::memcmp(&condition_val64, hostAddr, + sizeof(uint64_t)); } else if (pkt->getSize() == sizeof(uint32_t)) { condition_val32 = (uint32_t)pkt->req->getExtraData(); - overwrite_mem = !std::memcmp(&condition_val32, pmemAddr + - pkt->getAddr() - start(), sizeof(uint32_t)); + overwrite_mem = !std::memcmp(&condition_val32, hostAddr, + sizeof(uint32_t)); } else panic("Invalid size for conditional read/write\n"); } if (overwrite_mem) - std::memcpy(pmemAddr + pkt->getAddr() - start(), - &overwrite_val, pkt->getSize()); + std::memcpy(hostAddr, &overwrite_val, pkt->getSize()); -#if TRACING_ON - switch (pkt->getSize()) { - case sizeof(uint64_t): - DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n", - overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't", - condition_val64, overwrite_mem ? "happened" : "didn't happen"); - break; - case sizeof(uint32_t): - DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n", - overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't", - condition_val32, overwrite_mem ? "happened" : "didn't happen"); - break; - case sizeof(uint16_t): - DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n", - overwrite_mem); - break; - case sizeof(uint8_t): - DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n", - pkt->getSize(), pkt->getAddr(),pkt->get()); - DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n", - overwrite_mem); - break; - default: - DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x\n", - pkt->getSize(), pkt->getAddr()); + TRACE_PACKET("Read/Write"); + } else if (pkt->isRead()) { + assert(!pkt->isWrite()); + if (pkt->isLocked()) { + trackLoadLocked(pkt); + } + memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); + TRACE_PACKET("Read"); + } else if (pkt->isWrite()) { + if (writeOK(pkt)) { + memcpy(hostAddr, pkt->getPtr(), pkt->getSize()); + TRACE_PACKET("Write"); + } + } else if (pkt->isInvalidate()) { + //upgrade or invalidate + if (pkt->needsResponse()) { + pkt->makeAtomicResponse(); } -#endif } else { panic("unimplemented"); } + if (pkt->needsResponse()) { + pkt->makeAtomicResponse(); + } + return calculateLatency(pkt); +} + + +void +PhysicalMemory::doFunctionalAccess(PacketPtr pkt) +{ + assert(pkt->getAddr() >= start() && + pkt->getAddr() + pkt->getSize() <= start() + size()); + + uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start(); + + if (pkt->cmd == MemCmd::ReadReq) { + memcpy(pkt->getPtr(), hostAddr, pkt->getSize()); + TRACE_PACKET("Read"); + } else if (pkt->cmd == MemCmd::WriteReq) { + memcpy(hostAddr, pkt->getPtr(), pkt->getSize()); + TRACE_PACKET("Write"); + } else { + panic("PhysicalMemory: unimplemented functional command %s", + pkt->cmdString()); + } + pkt->result = Packet::Success; } + Port * PhysicalMemory::getPort(const std::string &if_name, int idx) { @@ -407,8 +396,7 @@ PhysicalMemory::MemoryPort::deviceBlockSize() Tick PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt) { - memory->doFunctionalAccess(pkt); - return memory->calculateLatency(pkt); + return memory->doAtomicAccess(pkt); } void diff --git a/src/mem/physical.hh b/src/mem/physical.hh index b9af5d334..8b13d32c1 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -112,12 +112,12 @@ class PhysicalMemory : public MemObject // inline a quick check for an empty locked addr list (hopefully // the common case), and do the full list search (if necessary) in // this out-of-line function - bool checkLockedAddrList(Request *req); + bool checkLockedAddrList(PacketPtr pkt); // Record the address of a load-locked operation so that we can // clear the execution context's lock flag if a matching store is // performed - void trackLoadLocked(Request *req); + void trackLoadLocked(PacketPtr pkt); // Compare a store address with any locked addresses so we can // clear the lock flag appropriately. Return value set to 'false' @@ -126,17 +126,18 @@ class PhysicalMemory : public MemObject // requesting execution context), 'true' otherwise. Note that // this method must be called on *all* stores since even // non-conditional stores must clear any matching lock addresses. - bool writeOK(Request *req) { + bool writeOK(PacketPtr pkt) { + Request *req = pkt->req; if (lockedAddrList.empty()) { // no locked addrs: nothing to check, store_conditional fails - bool isLocked = req->isLocked(); + bool isLocked = pkt->isLocked(); if (isLocked) { req->setExtraData(0); } return !isLocked; // only do write if not an sc } else { // iterate over list... - return checkLockedAddrList(req); + return checkLockedAddrList(pkt); } } @@ -175,6 +176,7 @@ class PhysicalMemory : public MemObject unsigned int drain(Event *de); protected: + Tick doAtomicAccess(PacketPtr pkt); void doFunctionalAccess(PacketPtr pkt); virtual Tick calculateLatency(PacketPtr pkt); void recvStatusChange(Port::Status status); diff --git a/src/mem/tport.cc b/src/mem/tport.cc index ed4c0c172..2644a504c 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -67,14 +67,17 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) // code to hanldle nacks here, but I'm pretty sure it didn't work // correctly with the drain code, so that would need to be fixed // if we ever added it back. - assert(pkt->result != Packet::Nacked); + assert(pkt->isRequest()); + assert(pkt->result == Packet::Unknown); + bool needsResponse = pkt->needsResponse(); Tick latency = recvAtomic(pkt); // turn packet around to go back to requester if response expected - if (pkt->needsResponse()) { - pkt->makeTimingResponse(); + if (needsResponse) { + // recvAtomic() should already have turned packet into atomic response + assert(pkt->isResponse()); + pkt->convertAtomicToTimingResponse(); schedSendTiming(pkt, curTick + latency); - } - else if (pkt->cmd != MemCmd::UpgradeReq) { + } else { delete pkt->req; delete pkt; } -- cgit v1.2.3 From 83af0fdcf57175adf8077c51e9ba872dd2c04b76 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 21 Jun 2007 11:59:17 -0700 Subject: Getting closer... configs/example/memtest.py: Add progress interval option. src/base/traceflags.py: Add MemTest flag. src/cpu/memtest/memtest.cc: Clean up tracing. src/cpu/memtest/memtest.hh: Get rid of unused code. --HG-- extra : convert_revision : 92bd8241a6c90bfb6d908e5a5132cbdb500cbb87 --- configs/example/memtest.py | 7 +- src/base/traceflags.py | 1 + src/cpu/memtest/memtest.cc | 140 ++----- src/cpu/memtest/memtest.hh | 10 - src/mem/cache/base_cache.cc | 6 +- src/mem/cache/base_cache.hh | 109 ++++-- src/mem/cache/cache.hh | 18 +- src/mem/cache/cache_impl.hh | 544 ++++++++++++-------------- src/mem/cache/coherence/coherence_protocol.cc | 3 +- src/mem/cache/miss/mshr.cc | 4 +- src/mem/cache/miss/mshr.hh | 2 +- src/mem/cache/miss/mshr_queue.cc | 9 +- src/mem/cache/miss/mshr_queue.hh | 7 +- src/mem/cache/prefetch/base_prefetcher.cc | 8 +- src/mem/packet.cc | 12 +- src/mem/packet.hh | 3 +- 16 files changed, 410 insertions(+), 473 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 9027a9866..0bc12e7bd 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -60,6 +60,11 @@ parser.add_option("-u", "--uncacheable", type="int", default=0, help="Target percentage of uncacheable accesses " "[default: %default]") +parser.add_option("--progress", type="int", default=1000, + metavar="NLOADS", + help="Progress message interval " + "[default: %default]") + (options, args) = parser.parse_args() if args: @@ -112,7 +117,7 @@ if options.numtesters > block_size: cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, percent_functional=options.functional, percent_uncacheable=options.uncacheable, - progress_interval=1000) + progress_interval=options.progress) for i in xrange(options.numtesters) ] # system simulated diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 6b241c410..f4cf7dfd7 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -128,6 +128,7 @@ baseFlags = [ 'Mbox', 'MemDepUnit', 'MemoryAccess', + 'MemTest', 'O3CPU', 'OzoneCPU', 'OzoneLSQ', diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 5d89f1b82..6e8c5d0bf 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -191,29 +191,25 @@ MemTest::init() // memory should be 0; no need to initialize them. } -static void -printData(ostream &os, uint8_t *data, int nbytes) -{ - os << hex << setfill('0'); - // assume little-endian: print bytes from highest address to lowest - for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) { - os << setw(2) << (unsigned)*dp; - } - os << dec; -} void MemTest::completeRequest(PacketPtr pkt) { + Request *req = pkt->req; + + DPRINTF(MemTest, "completing %s at address %x (blk %x)\n", + pkt->isWrite() ? "write" : "read", + req->getPaddr(), blockAddr(req->getPaddr())); + MemTestSenderState *state = dynamic_cast(pkt->senderState); uint8_t *data = state->data; uint8_t *pkt_data = pkt->getPtr(); - Request *req = pkt->req; //Remove the address from the list of outstanding - std::set::iterator removeAddr = outstandingAddrs.find(req->getPaddr()); + std::set::iterator removeAddr = + outstandingAddrs.find(req->getPaddr()); assert(removeAddr != outstandingAddrs.end()); outstandingAddrs.erase(removeAddr); @@ -237,39 +233,17 @@ MemTest::completeRequest(PacketPtr pkt) } if (numReads >= maxLoads) - exitSimLoop("Maximum number of loads reached!"); + exitSimLoop("maximum number of loads reached"); break; case MemCmd::WriteResp: numWritesStat++; break; -/* - case Copy: - //Also remove dest from outstanding list - removeAddr = outstandingAddrs.find(req->dest); - assert(removeAddr != outstandingAddrs.end()); - outstandingAddrs.erase(removeAddr); - numCopiesStat++; - break; -*/ + default: panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt()); } - if (blockAddr(req->getPaddr()) == traceBlockAddr) { - cerr << name() << ": completed " - << (pkt->isWrite() ? "write" : "read") - << " access of " - << dec << pkt->getSize() << " bytes at address 0x" - << hex << req->getPaddr() - << " (0x" << hex << blockAddr(req->getPaddr()) << ")" - << ", value = 0x"; - printData(cerr, pkt_data, pkt->getSize()); - cerr << " @ cycle " << dec << curTick; - - cerr << endl; - } - noResponseCycles = 0; delete state; delete [] data; @@ -325,7 +299,7 @@ MemTest::tick() //mem tester //We can eliminate the lower bits of the offset, and then use the id //to offset within the blks - offset &= ~63; //Not the low order bits + offset = blockAddr(offset); offset += id; access_size = 0; @@ -351,29 +325,23 @@ MemTest::tick() if (cmd < percentReads) { // read - //For now we only allow one outstanding request per addreess per tester - //This means we assume CPU does write forwarding to reads that alias something - //in the cpu store buffer. + // For now we only allow one outstanding request per address + // per tester This means we assume CPU does write forwarding + // to reads that alias something in the cpu store buffer. if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) { delete [] result; delete req; return; } - else outstandingAddrs.insert(paddr); + + outstandingAddrs.insert(paddr); // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin funcPort.readBlob(req->getPaddr(), result, req->getSize()); - if (blockAddr(paddr) == traceBlockAddr) { - cerr << name() - << ": initiating read " - << ((probe) ? "probe of " : "access of ") - << dec << req->getSize() << " bytes from addr 0x" - << hex << paddr - << " (0x" << hex << blockAddr(paddr) << ")" - << " at cycle " - << dec << curTick << endl; - } + DPRINTF(MemTest, + "initiating read at address %x (blk %x) expecting %x\n", + req->getPaddr(), blockAddr(req->getPaddr()), *result); PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast); pkt->dataDynamicArray(new uint8_t[req->getSize()]); @@ -385,36 +353,25 @@ MemTest::tick() pkt->makeAtomicResponse(); completeRequest(pkt); } else { -// req->completionEvent = new MemCompleteEvent(req, result, this); sendPkt(pkt); } } else { // write - //For now we only allow one outstanding request per addreess per tester - //This means we assume CPU does write forwarding to reads that alias something - //in the cpu store buffer. + // For now we only allow one outstanding request per addreess + // per tester. This means we assume CPU does write forwarding + // to reads that alias something in the cpu store buffer. if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) { delete [] result; delete req; return; } - else outstandingAddrs.insert(paddr); + outstandingAddrs.insert(paddr); + + DPRINTF(MemTest, "initiating write at address %x (blk %x) value %x\n", + req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff); -/* - if (blockAddr(req->getPaddr()) == traceBlockAddr) { - cerr << name() << ": initiating write " - << ((probe)?"probe of ":"access of ") - << dec << req->getSize() << " bytes (value = 0x"; - printData(cerr, data_pkt->getPtr(), req->getSize()); - cerr << ") to addr 0x" - << hex << req->getPaddr() - << " (0x" << hex << blockAddr(req->getPaddr()) << ")" - << " at cycle " - << dec << curTick << endl; - } -*/ PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast); uint8_t *pkt_data = new uint8_t[req->getSize()]; pkt->dataDynamicArray(pkt_data); @@ -429,54 +386,9 @@ MemTest::tick() pkt->makeAtomicResponse(); completeRequest(pkt); } else { -// req->completionEvent = new MemCompleteEvent(req, NULL, this); sendPkt(pkt); } } -/* else { - // copy - unsigned source_align = random() % 100; - unsigned dest_align = random() % 100; - unsigned offset2 = random() % size; - - Addr source = ((base) ? baseAddr1 : baseAddr2) + offset; - Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2; - if (outstandingAddrs.find(source) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(source); - if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return; - else outstandingAddrs.insert(dest); - - if (source_align >= percentSourceUnaligned) { - source = blockAddr(source); - } - if (dest_align >= percentDestUnaligned) { - dest = blockAddr(dest); - } - req->cmd = Copy; - req->flags &= ~UNCACHEABLE; - req->paddr = source; - req->dest = dest; - delete [] req->data; - req->data = new uint8_t[blockSize]; - req->size = blockSize; - if (source == traceBlockAddr || dest == traceBlockAddr) { - cerr << name() - << ": initiating copy of " - << dec << req->size << " bytes from addr 0x" - << hex << source - << " (0x" << hex << blockAddr(source) << ")" - << " to addr 0x" - << hex << dest - << " (0x" << hex << blockAddr(dest) << ")" - << " at cycle " - << dec << curTick << endl; - }* - cacheInterface->access(req); - uint8_t result[blockSize]; - checkMem->access(Read, source, &result, blockSize); - checkMem->access(Write, dest, &result, blockSize); - } -*/ } void diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh index 565fafb77..f4713709a 100644 --- a/src/cpu/memtest/memtest.hh +++ b/src/cpu/memtest/memtest.hh @@ -35,8 +35,6 @@ #include #include "base/statistics.hh" -//#include "mem/functional/functional.hh" -//#include "mem/mem_interface.hh" #include "sim/eventq.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" @@ -50,9 +48,6 @@ class MemTest : public MemObject public: MemTest(const std::string &name, -// MemInterface *_cache_interface, -// PhysicalMemory *main_mem, -// PhysicalMemory *check_mem, unsigned _memorySize, unsigned _percentReads, unsigned _percentFunctional, @@ -136,12 +131,7 @@ class MemTest : public MemObject uint8_t *data; }; -// Request *dataReq; PacketPtr retryPkt; -// MemInterface *cacheInterface; -// PhysicalMemory *mainMem; -// PhysicalMemory *checkMem; -// SimpleThread *thread; bool accessRetry; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index c7006550b..8b476e100 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -50,8 +50,9 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) BaseCache::BaseCache(const std::string &name, Params ¶ms) : MemObject(name), - mshrQueue(params.numMSHRs, 4), - writeBuffer(params.numWriteBuffers, params.numMSHRs+1000), + mshrQueue(params.numMSHRs, 4, MSHRQueue_MSHRs), + writeBuffer(params.numWriteBuffers, params.numMSHRs+1000, + MSHRQueue_WriteBuffer), blkSize(params.blkSize), numTarget(params.numTargets), blocked(0), @@ -128,6 +129,7 @@ BaseCache::init() cpuSidePort->sendStatusChange(Port::RangeChange); } + void BaseCache::regStats() { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 5969b4b3f..10fd3289c 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -54,41 +54,49 @@ #include "sim/eventq.hh" #include "sim/sim_exit.hh" -/** - * Reasons for Caches to be Blocked. - */ -enum BlockedCause{ - Blocked_NoMSHRs, - Blocked_NoTargets, - Blocked_NoWBBuffers, - Blocked_Coherence, - NUM_BLOCKED_CAUSES -}; - -/** - * Reasons for cache to request a bus. - */ -enum RequestCause{ - Request_MSHR, - Request_WB, - Request_Coherence, - Request_PF -}; - class MSHR; /** * A basic cache interface. Implements some common functions for speed. */ class BaseCache : public MemObject { + /** + * Indexes to enumerate the MSHR queues. + */ + enum MSHRQueueIndex { + MSHRQueue_MSHRs, + MSHRQueue_WriteBuffer + }; + + /** + * Reasons for caches to be blocked. + */ + enum BlockedCause { + Blocked_NoMSHRs = MSHRQueue_MSHRs, + Blocked_NoWBBuffers = MSHRQueue_WriteBuffer, + Blocked_NoTargets, + NUM_BLOCKED_CAUSES + }; + + public: + /** + * Reasons for cache to request a bus. + */ + enum RequestCause { + Request_MSHR = MSHRQueue_MSHRs, + Request_WB = MSHRQueue_WriteBuffer, + Request_PF, + NUM_REQUEST_CAUSES + }; + + private: + class CachePort : public SimpleTimingPort { public: BaseCache *cache; protected: - Event *responseEvent; - CachePort(const std::string &_name, BaseCache *_cache); virtual void recvStatusChange(Status status); @@ -154,6 +162,36 @@ class BaseCache : public MemObject /** Write/writeback buffer */ MSHRQueue writeBuffer; + MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size, + PacketPtr pkt, Tick time, bool requestBus) + { + MSHR *mshr = mq->allocate(addr, size, pkt); + mshr->order = order++; + + if (mq->isFull()) { + setBlocked((BlockedCause)mq->index); + } + + if (requestBus) { + requestMemSideBus((RequestCause)mq->index, time); + } + + return mshr; + } + + void markInServiceInternal(MSHR *mshr) + { + MSHRQueue *mq = mshr->queue; + bool wasFull = mq->isFull(); + mq->markInService(mshr); + if (!mq->havePending()) { + deassertMemSideBusRequest((RequestCause)mq->index); + } + if (wasFull && !mq->isFull()) { + clearBlocked((BlockedCause)mq->index); + } + } + /** Block size of this cache */ const int blkSize; @@ -382,6 +420,31 @@ class BaseCache : public MemObject Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); } + MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus) + { + return allocateBufferInternal(&mshrQueue, + blockAlign(pkt->getAddr()), blkSize, + pkt, time, requestBus); + } + + MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus) + { + MSHRQueue *mq = NULL; + + if (pkt->isWrite() && !pkt->isRead()) { + /** + * @todo Add write merging here. + */ + mq = &writeBuffer; + } else { + mq = &mshrQueue; + } + + return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(), + pkt, time, requestBus); + } + + /** * Returns true if the cache is blocked for accesses. */ diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 16d15cf86..06fce1a71 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -179,7 +179,7 @@ class Cache : public BaseCache * @return Pointer to the cache block touched by the request. NULL if it * was a miss. */ - bool access(PacketPtr pkt, BlkType *blk, int & lat); + bool access(PacketPtr pkt, BlkType *&blk, int &lat); /** *Handle doing the Compare and Swap function for SPARC. @@ -201,7 +201,7 @@ class Cache : public BaseCache bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk); bool satisfyTarget(MSHR::Target *target, BlkType *blk); - void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk); + bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk); void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data); @@ -310,15 +310,16 @@ class Cache : public BaseCache * @param isFill Whether to fetch & allocate a block * or just forward the request. */ - MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill, - bool requestBus); + MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus); /** * Selects a outstanding request to service. * @return The request to service, NULL if none found. */ + PacketPtr getBusPacket(PacketPtr cpu_pkt, BlkType *blk, + bool needsExclusive); MSHR *getNextMSHR(); - PacketPtr getPacket(); + PacketPtr getTimingPacket(); /** * Marks a request as in service (sent on the bus). This can have side @@ -328,13 +329,6 @@ class Cache : public BaseCache */ void markInService(MSHR *mshr); - /** - * Collect statistics and free resources of a satisfied request. - * @param pkt The request that has been satisfied. - * @param time The time when the request is satisfied. - */ - void handleResponse(PacketPtr pkt, Tick time); - /** * Perform the given writeback request. * @param pkt The writeback request. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 0f66e613c..81fcb4158 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -152,40 +152,21 @@ Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) template MSHR * Cache::allocateBuffer(PacketPtr pkt, Tick time, - bool isFill, bool requestBus) + bool requestBus) { - int size = isFill ? blkSize : pkt->getSize(); - Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr(); + MSHRQueue *mq = NULL; - MSHR *mshr = NULL; - - if (pkt->isWrite()) { + if (pkt->isWrite() && !pkt->isRead()) { /** * @todo Add write merging here. */ - mshr = writeBuffer.allocate(addr, size, pkt, isFill); - mshr->order = order++; - - if (writeBuffer.isFull()) { - setBlocked(Blocked_NoWBBuffers); - } - - if (requestBus) { - requestMemSideBus(Request_WB, time); - } + mq = &writeBuffer; } else { - mshr = mshrQueue.allocate(addr, size, pkt, isFill); - mshr->order = order++; - if (mshrQueue.isFull()) { - setBlocked(Blocked_NoMSHRs); - } - if (requestBus) { - requestMemSideBus(Request_MSHR, time); - } + mq = &mshrQueue; } - assert(mshr != NULL); - return mshr; + return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(), + pkt, time, requestBus); } @@ -193,33 +174,7 @@ template void Cache::markInService(MSHR *mshr) { - bool unblock = false; - BlockedCause cause = NUM_BLOCKED_CAUSES; - - /** - * @todo Should include MSHRQueue pointer in MSHR to select the correct - * one. - */ - if (mshr->queue == &writeBuffer) { - // Forwarding a write/ writeback, don't need to change - // the command - unblock = writeBuffer.isFull(); - writeBuffer.markInService(mshr); - if (!writeBuffer.havePending()){ - deassertMemSideBusRequest(Request_WB); - } - if (unblock) { - // Do we really unblock? - unblock = !writeBuffer.isFull(); - cause = Blocked_NoWBBuffers; - } - } else { - assert(mshr->queue == &mshrQueue); - unblock = mshrQueue.isFull(); - mshrQueue.markInService(mshr); - if (!mshrQueue.havePending()){ - deassertMemSideBusRequest(Request_MSHR); - } + markInServiceInternal(mshr); #if 0 if (mshr->originalCmd == MemCmd::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", @@ -231,14 +186,6 @@ Cache::markInService(MSHR *mshr) } } #endif - if (unblock) { - unblock = !mshrQueue.isFull(); - cause = Blocked_NoMSHRs; - } - } - if (unblock) { - clearBlocked(cause); - } } @@ -275,9 +222,16 @@ Cache::squash(int threadNum) template bool -Cache::access(PacketPtr pkt, BlkType *blk, int &lat) +Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) { + if (pkt->req->isUncacheable()) { + blk = NULL; + lat = hitLatency; + return false; + } + bool satisfied = false; // assume the worst + blk = tags->findBlock(pkt->getAddr(), lat); if (prefetchAccess) { //We are determining prefetches on access stream, call prefetcher @@ -307,6 +261,8 @@ Cache::access(PacketPtr pkt, BlkType *blk, int &lat) hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; satisfied = true; + // Check RMW operations first since both isRead() and + // isWrite() will be true for them if (pkt->cmd == MemCmd::SwapReq) { cmpAndSwap(blk, pkt); } else if (pkt->isWrite()) { @@ -314,12 +270,16 @@ Cache::access(PacketPtr pkt, BlkType *blk, int &lat) blk->status |= BlkDirty; pkt->writeDataToBlock(blk->data, blkSize); } - } else { - assert(pkt->isRead()); + } else if (pkt->isRead()) { if (pkt->isLocked()) { blk->trackLoadLocked(pkt); } pkt->setDataFromBlock(blk->data, blkSize); + } else { + // Not a read or write... must be an upgrade. it's OK + // to just ack those as long as we have an exclusive + // copy at this level. + assert(pkt->cmd == MemCmd::UpgradeReq); } } else { // permission violation... nothing to do here, leave unsatisfied @@ -351,19 +311,24 @@ Cache::timingAccess(PacketPtr pkt) // we charge hitLatency for doing just about anything here Tick time = curTick + hitLatency; + if (pkt->memInhibitAsserted()) { + DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", + pkt->getAddr()); + assert(!pkt->req->isUncacheable()); + return true; + } + if (pkt->req->isUncacheable()) { - allocateBuffer(pkt, time, false, true); + allocateBuffer(pkt, time, true); assert(pkt->needsResponse()); // else we should delete it here?? return true; } PacketList writebacks; int lat = hitLatency; - BlkType *blk = tags->findBlock(pkt->getAddr(), lat); bool satisfied = false; Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - MSHR *mshr = mshrQueue.findMatch(blk_addr); if (!mshr) { @@ -373,6 +338,7 @@ Cache::timingAccess(PacketPtr pkt) // cache block... a more aggressive system could detect the // overlap (if any) and forward data out of the MSHRs, but we // don't do that yet) + BlkType *blk = NULL; satisfied = access(pkt, blk, lat); } @@ -401,7 +367,7 @@ Cache::timingAccess(PacketPtr pkt) // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, false, true); + allocateBuffer(wbPkt, time, true); writebacks.pop_front(); } @@ -435,7 +401,7 @@ Cache::timingAccess(PacketPtr pkt) // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. - allocateBuffer(pkt, time, true, true); + allocateMissBuffer(pkt, time, true); } } @@ -449,54 +415,109 @@ Cache::timingAccess(PacketPtr pkt) } +template +PacketPtr +Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, + bool needsExclusive) +{ + bool blkValid = blk && blk->isValid(); + + if (cpu_pkt->req->isUncacheable()) { + assert(blk == NULL); + return NULL; + } + + if (!blkValid && + (cpu_pkt->cmd == MemCmd::Writeback || + cpu_pkt->cmd == MemCmd::UpgradeReq)) { + // For now, writebacks from upper-level caches that + // completely miss in the cache just go through. If we had + // "fast write" support (where we could write the whole + // block w/o fetching new data) we might want to allocate + // on writeback misses instead. + return NULL; + } + + MemCmd cmd; + const bool useUpgrades = true; + if (blkValid && useUpgrades) { + // only reason to be here is that blk is shared + // (read-only) and we need exclusive + assert(needsExclusive && !blk->isWritable()); + cmd = MemCmd::UpgradeReq; + } else { + // block is invalid + cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq; + } + PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize); + + pkt->allocate(); + return pkt; +} + + template Tick Cache::atomicAccess(PacketPtr pkt) { + int lat = hitLatency; + + if (pkt->memInhibitAsserted()) { + DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", + pkt->getAddr()); + assert(!pkt->req->isUncacheable()); + return lat; + } + // should assert here that there are no outstanding MSHRs or // writebacks... that would mean that someone used an atomic // access in timing mode - if (pkt->req->isUncacheable()) { - // Uncacheables just go through - return memSidePort->sendAtomic(pkt); - } - - PacketList writebacks; - int lat = hitLatency; - BlkType *blk = tags->findBlock(pkt->getAddr(), lat); - bool satisfied = access(pkt, blk, lat); + BlkType *blk = NULL; - if (!satisfied) { + if (!access(pkt, blk, lat)) { // MISS - CacheBlk::State old_state = (blk) ? blk->status : 0; - MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state); - Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize); - busPkt.allocate(); + PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive()); - DPRINTF(Cache, "Sending a atomic %s for %x\n", - busPkt.cmdString(), busPkt.getAddr()); + bool isCacheFill = (busPkt != NULL); - lat += memSidePort->sendAtomic(&busPkt); + if (busPkt == NULL) { + // just forwarding the same request to the next level + // no local cache operation involved + busPkt = pkt; + } - DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", - busPkt.cmdString(), busPkt.getAddr(), old_state); + DPRINTF(Cache, "Sending an atomic %s for %x\n", + busPkt->cmdString(), busPkt->getAddr()); - blk = handleFill(&busPkt, blk, writebacks); - bool status = satisfyCpuSideRequest(pkt, blk); - assert(status); - } +#if TRACING_ON + CacheBlk::State old_state = blk ? blk->status : 0; +#endif - // We now have the block one way or another (hit or completed miss) + lat += memSidePort->sendAtomic(busPkt); - // Handle writebacks if needed - while (!writebacks.empty()){ - PacketPtr wbPkt = writebacks.front(); - memSidePort->sendAtomic(wbPkt); - writebacks.pop_front(); - delete wbPkt; + DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", + busPkt->cmdString(), busPkt->getAddr(), old_state); + + if (isCacheFill) { + PacketList writebacks; + blk = handleFill(busPkt, blk, writebacks); + bool status = satisfyCpuSideRequest(pkt, blk); + assert(status); + delete busPkt; + + // Handle writebacks if needed + while (!writebacks.empty()){ + PacketPtr wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); + writebacks.pop_front(); + delete wbPkt; + } + } } + // We now have the block one way or another (hit or completed miss) + if (pkt->needsResponse()) { pkt->makeAtomicResponse(); pkt->result = Packet::Success; @@ -553,98 +574,94 @@ Cache::functionalAccess(PacketPtr pkt, // ///////////////////////////////////////////////////// + template -void -Cache::handleResponse(PacketPtr pkt, Tick time) +bool +Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) { - MSHR *mshr = dynamic_cast(pkt->senderState); -#ifndef NDEBUG - int num_targets = mshr->getNumTargets(); -#endif - - bool unblock = false; - bool unblock_target = false; - BlockedCause cause = NUM_BLOCKED_CAUSES; - - if (mshr->isCacheFill) { -#if 0 - mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; -#endif - // targets were handled in the cache tags - if (mshr == noTargetMSHR) { - // we always clear at least one target - unblock_target = true; - cause = Blocked_NoTargets; - noTargetMSHR = NULL; - } + if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) { + assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); + assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); - if (mshr->hasTargets()) { - // Didn't satisfy all the targets, need to resend - mshrQueue.markPending(mshr); - mshr->order = order++; - requestMemSideBus(Request_MSHR, time); - } - else { - unblock = mshrQueue.isFull(); - mshrQueue.deallocate(mshr); - if (unblock) { - unblock = !mshrQueue.isFull(); - cause = Blocked_NoMSHRs; + if (pkt->isWrite()) { + if (blk->checkWrite(pkt)) { + blk->status |= BlkDirty; + pkt->writeDataToBlock(blk->data, blkSize); } + } else if (pkt->isReadWrite()) { + cmpAndSwap(blk, pkt); + } else { + if (pkt->isLocked()) { + blk->trackLoadLocked(pkt); + } + pkt->setDataFromBlock(blk->data, blkSize); } + + return true; } else { - if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; - } - if (mshr->hasTargets() && pkt->req->isUncacheable()) { - // Should only have 1 target if we had any - assert(num_targets == 1); - MSHR::Target *target = mshr->getTarget(); - assert(target->cpuSide); - mshr->popTarget(); - if (pkt->isRead()) { - target->pkt->setData(pkt->getPtr()); - } - cpuSidePort->respond(target->pkt, time); - assert(!mshr->hasTargets()); + return false; + } +} + + +template +bool +Cache::satisfyTarget(MSHR::Target *target, BlkType *blk) +{ + assert(target != NULL); + assert(target->isCpuSide()); + return satisfyCpuSideRequest(target->pkt, blk); +} + +template +bool +Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, + BlkType *blk) +{ + // respond to MSHR targets, if any + + // First offset for critical word first calculations + int initial_offset = 0; + + if (mshr->hasTargets()) { + initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); + } + + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); + + if (!satisfyTarget(target, blk)) { + // Invalid access, need to do another request + // can occur if block is invalidated, or not correct + // permissions + MSHRQueue *mq = mshr->queue; + mq->markPending(mshr); + mshr->order = order++; + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + return false; } - else if (mshr->hasTargets()) { - //Must be a no_allocate with possibly more than one target - assert(!mshr->isCacheFill); - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - assert(target->isCpuSide()); - mshr->popTarget(); - if (pkt->isRead()) { - target->pkt->setData(pkt->getPtr()); - } - cpuSidePort->respond(target->pkt, time); - } + + + // How many bytes pass the first request is this one + int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; + if (transfer_offset < 0) { + transfer_offset += blkSize; } - if (pkt->isWrite()) { - // If the wrtie buffer is full, we might unblock now - unblock = writeBuffer.isFull(); - writeBuffer.deallocate(mshr); - if (unblock) { - // Did we really unblock? - unblock = !writeBuffer.isFull(); - cause = Blocked_NoWBBuffers; - } - } else { - unblock = mshrQueue.isFull(); - mshrQueue.deallocate(mshr); - if (unblock) { - unblock = !mshrQueue.isFull(); - cause = Blocked_NoMSHRs; - } + // If critical word (no offset) return first word time + Tick completion_time = tags->getHitLatency() + + transfer_offset ? pkt->finishTime : pkt->firstWordTime; + + if (!target->pkt->req->isUncacheable()) { + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->time; } + target->pkt->makeTimingResponse(); + cpuSidePort->respond(target->pkt, completion_time); + mshr->popTarget(); } - if (unblock || unblock_target) { - clearBlocked(cause); - } + + return true; } @@ -665,21 +682,60 @@ Cache::handleResponse(PacketPtr pkt) assert(pkt->result == Packet::Success); DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); + MSHRQueue *mq = mshr->queue; + bool wasFull = mq->isFull(); + + if (mshr == noTargetMSHR) { + // we always clear at least one target + clearBlocked(Blocked_NoTargets); + noTargetMSHR = NULL; + } + + // Can we deallocate MSHR when done? + bool deallocate = false; + if (mshr->isCacheFill) { +#if 0 + mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += + curTick - pkt->time; +#endif DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); PacketList writebacks; blk = handleFill(pkt, blk, writebacks); - satisfyMSHR(mshr, pkt, blk); + deallocate = satisfyMSHR(mshr, pkt, blk); // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, false, true); + allocateBuffer(wbPkt, time, true); writebacks.pop_front(); } + } else { + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += + curTick - pkt->time; + } + + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); + assert(target->isCpuSide()); + mshr->popTarget(); + if (pkt->isRead()) { + target->pkt->setData(pkt->getPtr()); + } + cpuSidePort->respond(target->pkt, time); + } + assert(!mshr->hasTargets()); + deallocate = true; + } + + if (deallocate) { + mq->deallocate(mshr); + if (wasFull && !mq->isFull()) { + clearBlocked((BlockedCause)mq->index); + } } - handleResponse(pkt, time); } @@ -717,6 +773,8 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, Addr addr = pkt->getAddr(); if (blk == NULL) { + // better have read new data + assert(pkt->isRead()); // need to do a replacement blk = tags->findReplacement(addr, writebacks); @@ -733,7 +791,6 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, blk->tag = tags->extractTag(addr); blk->status = coherence->getNewState(pkt); - assert(pkt->isRead()); } else { // existing block... probably an upgrade assert(blk->tag == tags->extractTag(addr)); @@ -759,90 +816,6 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, } -template -bool -Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) -{ - if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) { - assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); - assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); - - if (pkt->isWrite()) { - if (blk->checkWrite(pkt)) { - blk->status |= BlkDirty; - pkt->writeDataToBlock(blk->data, blkSize); - } - } else if (pkt->isReadWrite()) { - cmpAndSwap(blk, pkt); - } else { - if (pkt->isLocked()) { - blk->trackLoadLocked(pkt); - } - pkt->setDataFromBlock(blk->data, blkSize); - } - - return true; - } else { - return false; - } -} - - -template -bool -Cache::satisfyTarget(MSHR::Target *target, BlkType *blk) -{ - assert(target != NULL); - assert(target->isCpuSide()); - return satisfyCpuSideRequest(target->pkt, blk); -} - -template -void -Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, - BlkType *blk) -{ - // respond to MSHR targets, if any - - // First offset for critical word first calculations - int initial_offset = 0; - - if (mshr->hasTargets()) { - initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); - } - - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - - if (!satisfyTarget(target, blk)) { - // Invalid access, need to do another request - // can occur if block is invalidated, or not correct - // permissions - break; - } - - - // How many bytes pass the first request is this one - int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; - if (transfer_offset < 0) { - transfer_offset += blkSize; - } - - // If critical word (no offset) return first word time - Tick completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; - - if (!target->pkt->req->isUncacheable()) { - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->time; - } - target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, completion_time); - mshr->popTarget(); - } -} - - ///////////////////////////////////////////////////// // // Snoop path: requests coming in from the memory side @@ -1052,7 +1025,7 @@ Cache::getNextMSHR() // (hwpf_mshr_misses) mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; // Don't request bus, since we already have it - return allocateBuffer(pkt, curTick, true, false); + return allocateMissBuffer(pkt, curTick, false); } } @@ -1062,7 +1035,7 @@ Cache::getNextMSHR() template PacketPtr -Cache::getPacket() +Cache::getTimingPacket() { MSHR *mshr = getNextMSHR(); @@ -1073,30 +1046,21 @@ Cache::getPacket() BlkType *blk = tags->findBlock(mshr->addr); // use request from 1st target - MSHR::Target *tgt1 = mshr->getTarget(); - PacketPtr tgt1_pkt = tgt1->pkt; - PacketPtr pkt; + PacketPtr tgt_pkt = mshr->getTarget()->pkt; + PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive); - if (mshr->isCacheFill) { - MemCmd cmd; - if (blk && blk->isValid()) { - // only reason to be here is that blk is shared - // (read-only) and we need exclusive - assert(mshr->needsExclusive && !blk->isWritable()); - cmd = MemCmd::UpgradeReq; - } else { - // block is invalid - cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq; + mshr->isCacheFill = (pkt != NULL); + + if (pkt == NULL) { + // make copy of current packet to forward + pkt = new Packet(tgt_pkt); + pkt->allocate(); + if (pkt->isWrite()) { + pkt->setData(tgt_pkt->getPtr()); } - pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast); - } else { - assert(blk == NULL); - assert(mshr->getNumTargets() == 1); - pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast); } pkt->senderState = mshr; - pkt->allocate(); return pkt; } @@ -1243,7 +1207,7 @@ Cache::MemSidePort::sendPacket() waitingOnRetry = !success; } else { // check for non-response packets (requests & writebacks) - PacketPtr pkt = myCache()->getPacket(); + PacketPtr pkt = myCache()->getTimingPacket(); MSHR *mshr = dynamic_cast(pkt->senderState); bool success = sendTiming(pkt); diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 3fd17c8c7..47d2b469f 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -259,7 +259,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, MC::Command writeToSharedCmd = doUpgrades ? MC::UpgradeReq : MC::ReadExReq; MC::Command writeToSharedResp = - doUpgrades ? MC::UpgradeReq : MC::ReadExResp; + doUpgrades ? MC::UpgradeResp : MC::ReadExResp; // Note that all transitions by default cause a panic. // Override the valid transitions with the appropriate actions here. @@ -272,6 +272,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq); tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq); tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq); + tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq); tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd); tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq); tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 218d42339..1f2c05a6e 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -54,12 +54,12 @@ MSHR::MSHR() } void -MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill) +MSHR::allocate(Addr _addr, int _size, PacketPtr target) { addr = _addr; size = _size; assert(target); - isCacheFill = cacheFill; + isCacheFill = false; needsExclusive = target->needsExclusive(); _isUncacheable = target->req->isUncacheable(); inService = false; diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index b38b69c52..47f6a819b 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -136,7 +136,7 @@ public: * @param size The number of bytes to request. * @param pkt The original miss. */ - void allocate(Addr addr, int size, PacketPtr pkt, bool isFill); + void allocate(Addr addr, int size, PacketPtr pkt); /** * Allocate this MSHR as a buffer for the given request. diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index d58594798..6b030a865 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -36,8 +36,9 @@ using namespace std; -MSHRQueue::MSHRQueue(int num_entries, int reserve) - : numEntries(num_entries + reserve - 1), numReserve(reserve) +MSHRQueue::MSHRQueue(int num_entries, int reserve, int _index) + : numEntries(num_entries + reserve - 1), numReserve(reserve), + index(_index) { allocated = 0; inServiceEntries = 0; @@ -107,14 +108,14 @@ MSHRQueue::findPending(Addr addr, int size) const } MSHR * -MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill) +MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt) { assert(!freeList.empty()); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - mshr->allocate(addr, size, pkt, isFill); + mshr->allocate(addr, size, pkt); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->readyIter = pendingList.insert(pendingList.end(), mshr); diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 182dfd5b2..806aa9c64 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -74,6 +74,9 @@ class MSHRQueue int allocated; /** The number of entries that have been forwarded to the bus. */ int inServiceEntries; + /** The index of this queue within the cache (MSHR queue vs. write + * buffer). */ + const int index; /** * Create a queue with a given number of entries. @@ -81,7 +84,7 @@ class MSHRQueue * @param reserve The minimum number of entries needed to satisfy * any access. */ - MSHRQueue(int num_entries, int reserve = 1); + MSHRQueue(int num_entries, int reserve, int index); /** Destructor */ ~MSHRQueue(); @@ -118,7 +121,7 @@ class MSHRQueue * * @pre There are free entries. */ - MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill); + MSHR *allocate(Addr addr, int size, PacketPtr &pkt); /** * Removes the given MSHR from the queue. This places the MSHR on the diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index d03cfe3ae..378363665 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -141,7 +141,7 @@ BasePrefetcher::getPacket() keepTrying = cache->inCache(pkt->getAddr()); } if (pf.empty()) { - cache->deassertMemSideBusRequest(Request_PF); + cache->deassertMemSideBusRequest(BaseCache::Request_PF); if (keepTrying) return NULL; //None left, all were in cache } } while (keepTrying); @@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) pfRemovedMSHR++; pf.erase(iter); if (pf.empty()) - cache->deassertMemSideBusRequest(Request_PF); + cache->deassertMemSideBusRequest(BaseCache::Request_PF); } //Remove anything in queue with delay older than time @@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) iter--; } if (pf.empty()) - cache->deassertMemSideBusRequest(Request_PF); + cache->deassertMemSideBusRequest(BaseCache::Request_PF); } @@ -243,7 +243,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) pf.push_back(prefetch); //Make sure to request the bus, with proper delay - cache->requestMemSideBus(Request_PF, prefetch->time); + cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time); //Increment through the list addr++; diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 57c6a6381..cd0ed8a2e 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -64,10 +64,8 @@ MemCmd::commandInfo[] = /* WriteResp */ { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" }, /* Writeback */ - { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse), - WritebackAck, "Writeback" }, - /* WritebackAck */ - { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" }, + { SET4(IsWrite, NeedsExclusive, IsRequest, HasData), + InvalidCmd, "Writeback" }, /* SoftPFReq */ { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse), SoftPFResp, "SoftPFReq" }, @@ -88,7 +86,11 @@ MemCmd::commandInfo[] = { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse), InvalidCmd, "WriteInvalidateResp" }, /* UpgradeReq */ - { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" }, + { SET4(IsInvalidate, NeedsExclusive, IsRequest, NeedsResponse), + UpgradeResp, "UpgradeReq" }, + /* UpgradeResp */ + { SET3(IsInvalidate, NeedsExclusive, IsResponse), + InvalidCmd, "UpgradeResp" }, /* ReadExReq */ { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse), ReadExResp, "ReadExReq" }, diff --git a/src/mem/packet.hh b/src/mem/packet.hh index ca186d875..6291b7c1d 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -67,7 +67,6 @@ class MemCmd WriteReq, WriteResp, Writeback, - WritebackAck, SoftPFReq, HardPFReq, SoftPFResp, @@ -75,6 +74,7 @@ class MemCmd WriteInvalidateReq, WriteInvalidateResp, UpgradeReq, + UpgradeResp, ReadExReq, ReadExResp, LoadLockedReq, @@ -100,7 +100,6 @@ class MemCmd NeedsResponse, //!< Requester needs response from target IsSWPrefetch, IsHWPrefetch, - IsUpgrade, IsLocked, //!< Alpha/MIPS LL or SC access HasData, //!< There is an associated payload NUM_COMMAND_ATTRIBUTES -- cgit v1.2.3 From bdd5fd20fb19eb52ef812cd284094e5513646e36 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 22 Jun 2007 09:24:07 -0700 Subject: Fixes to hitLatency, blocking, buffer allocation. Single-cpu timing mode seems to work now. --HG-- extra : convert_revision : 720f6172df18a1c941e5bd0e8fdfbd686c13c7ad --- src/mem/cache/base_cache.cc | 1 + src/mem/cache/base_cache.hh | 31 ++++++++++------------ src/mem/cache/cache.hh | 26 ------------------- src/mem/cache/cache_impl.hh | 56 +++++++++++++++++----------------------- src/mem/cache/miss/mshr.hh | 24 +++++++---------- src/mem/cache/miss/mshr_queue.cc | 10 +++---- 6 files changed, 53 insertions(+), 95 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 8b476e100..1f5182574 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -54,6 +54,7 @@ BaseCache::BaseCache(const std::string &name, Params ¶ms) writeBuffer(params.numWriteBuffers, params.numMSHRs+1000, MSHRQueue_WriteBuffer), blkSize(params.blkSize), + hitLatency(params.hitLatency), numTarget(params.numTargets), blocked(0), noTargetMSHR(NULL), diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 10fd3289c..27134b2ad 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -195,6 +195,11 @@ class BaseCache : public MemObject /** Block size of this cache */ const int blkSize; + /** + * The latency of a hit in this device. + */ + int hitLatency; + /** The number of targets for each MSHR. */ const int numTarget; @@ -464,15 +469,10 @@ class BaseCache : public MemObject if (blocked == 0) { blocked_causes[cause]++; blockedCycle = curTick; + cpuSidePort->setBlocked(); } - int old_state = blocked; - if (!(blocked & flag)) { - //Wasn't already blocked for this cause - blocked |= flag; - DPRINTF(Cache,"Blocking for cause %s\n", cause); - if (!old_state) - cpuSidePort->setBlocked(); - } + blocked |= flag; + DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked); } /** @@ -485,16 +485,11 @@ class BaseCache : public MemObject void clearBlocked(BlockedCause cause) { uint8_t flag = 1 << cause; - DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n", - cause, blocked); - if (blocked & flag) - { - blocked &= ~flag; - if (!isBlocked()) { - blocked_cycles[cause] += curTick - blockedCycle; - DPRINTF(Cache,"Unblocking from all causes\n"); - cpuSidePort->clearBlocked(); - } + blocked &= ~flag; + DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked); + if (blocked == 0) { + blocked_cycles[cause] += curTick - blockedCycle; + cpuSidePort->clearBlocked(); } } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 06fce1a71..a93b761ec 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -136,23 +136,6 @@ class Cache : public BaseCache /** Prefetcher */ BasePrefetcher *prefetcher; - /** - * The clock ratio of the outgoing bus. - * Used for calculating critical word first. - */ - int busRatio; - - /** - * The bus width in bytes of the outgoing bus. - * Used for calculating critical word first. - */ - int busWidth; - - /** - * The latency of a hit in this device. - */ - int hitLatency; - /** * Can this cache should allocate a block on a line-sized write miss. */ @@ -303,15 +286,6 @@ class Cache : public BaseCache */ void squash(int threadNum); - /** - * Allocate a new MSHR or write buffer to handle a miss. - * @param pkt The access that missed. - * @param time The time to continue processing the miss. - * @param isFill Whether to fetch & allocate a block - * or just forward the request. - */ - MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus); - /** * Selects a outstanding request to service. * @return The request to service, NULL if none found. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 81fcb4158..0649b5061 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -149,27 +149,6 @@ Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) ///////////////////////////////////////////////////// -template -MSHR * -Cache::allocateBuffer(PacketPtr pkt, Tick time, - bool requestBus) -{ - MSHRQueue *mq = NULL; - - if (pkt->isWrite() && !pkt->isRead()) { - /** - * @todo Add write merging here. - */ - mq = &writeBuffer; - } else { - mq = &mshrQueue; - } - - return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(), - pkt, time, requestBus); -} - - template void Cache::markInService(MSHR *mshr) @@ -438,6 +417,8 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, return NULL; } + assert(cpu_pkt->needsResponse()); + MemCmd cmd; const bool useUpgrades = true; if (blkValid && useUpgrades) { @@ -1043,23 +1024,34 @@ Cache::getTimingPacket() return NULL; } - BlkType *blk = tags->findBlock(mshr->addr); - // use request from 1st target PacketPtr tgt_pkt = mshr->getTarget()->pkt; - PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive); + PacketPtr pkt = NULL; - mshr->isCacheFill = (pkt != NULL); - - if (pkt == NULL) { - // make copy of current packet to forward - pkt = new Packet(tgt_pkt); - pkt->allocate(); - if (pkt->isWrite()) { - pkt->setData(tgt_pkt->getPtr()); + if (mshr->isSimpleForward()) { + // no response expected, just forward packet as it is + assert(tags->findBlock(mshr->addr) == NULL); + pkt = tgt_pkt; + } else { + BlkType *blk = tags->findBlock(mshr->addr); + pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive); + + mshr->isCacheFill = (pkt != NULL); + + if (pkt == NULL) { + // not a cache block request, but a response is expected + assert(!mshr->isSimpleForward()); + // make copy of current packet to forward, keep current + // copy for response handling + pkt = new Packet(tgt_pkt); + pkt->allocate(); + if (pkt->isWrite()) { + pkt->setData(tgt_pkt->getPtr()); + } } } + assert(pkt != NULL); pkt->senderState = mshr; return pkt; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 47f6a819b..195438e46 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -164,28 +164,19 @@ public: * Returns the current number of allocated targets. * @return The current number of allocated targets. */ - int getNumTargets() - { - return ntargets; - } + int getNumTargets() { return ntargets; } /** * Returns a pointer to the target list. * @return a pointer to the target list. */ - TargetList* getTargetList() - { - return &targets; - } + TargetList* getTargetList() { return &targets; } /** * Returns a reference to the first target. * @return A pointer to the first target. */ - Target *getTarget() - { - return &targets.front(); - } + Target *getTarget() { return &targets.front(); } /** * Pop first target. @@ -200,9 +191,14 @@ public: * Returns true if there are targets left. * @return true if there are targets */ - bool hasTargets() + bool hasTargets() { return !targets.empty(); } + + bool isSimpleForward() { - return !targets.empty(); + if (getNumTargets() != 1) + return false; + Target *tgt = getTarget(); + return tgt->isCpuSide() && !tgt->pkt->needsResponse(); } /** diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 6b030a865..3407e2588 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -158,14 +158,14 @@ MSHRQueue::moveToFront(MSHR *mshr) void MSHRQueue::markInService(MSHR *mshr) { - //assert(mshr == pendingList.front()); -#if 0 - if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) { - assert(mshr->getNumTargets() == 0); + if (mshr->isSimpleForward()) { + // we just forwarded the request packet & don't expect a + // response, so get rid of it + assert(mshr->getNumTargets() == 1); + mshr->popTarget(); deallocate(mshr); return; } -#endif mshr->inService = true; pendingList.erase(mshr->readyIter); //mshr->readyIter = NULL; -- cgit v1.2.3 From 57ff2604e59647c6afe988767186f13c80c1aa16 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 23 Jun 2007 13:24:33 -0700 Subject: Minor fix plus new assertion to catch similar bugs. src/cpu/memtest/memtest.cc: Need to set packet source field so that response from cache doesn't run into assertion failure when copying source to dest. src/mem/packet.hh: Copy source field when copying packets. Assert that source is valid before copying it to dest when turning packets around. --HG-- extra : convert_revision : 09e3cfda424aa89fe170e21e955b295746832bf8 --- src/cpu/memtest/memtest.cc | 2 ++ src/mem/packet.hh | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 6e8c5d0bf..019b4328c 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -344,6 +344,7 @@ MemTest::tick() req->getPaddr(), blockAddr(req->getPaddr()), *result); PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast); + pkt->setSrc(0); pkt->dataDynamicArray(new uint8_t[req->getSize()]); MemTestSenderState *state = new MemTestSenderState(result); pkt->senderState = state; @@ -373,6 +374,7 @@ MemTest::tick() req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff); PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast); + pkt->setSrc(0); uint8_t *pkt_data = new uint8_t[req->getSize()]; pkt->dataDynamicArray(pkt_data); memcpy(pkt_data, &data, req->getSize()); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 80da045ef..fc1c283ed 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -384,7 +384,7 @@ class Packet : public FastAlloc Packet(Packet *origPkt) : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(origPkt->addr), size(origPkt->size), - dest(origPkt->dest), + src(origPkt->src), dest(origPkt->dest), addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid), snoopFlags(origPkt->snoopFlags), time(curTick), @@ -440,7 +440,7 @@ class Packet : public FastAlloc */ void convertAtomicToTimingResponse() { - dest = src; + dest = getSrc(); srcValid = false; } -- cgit v1.2.3 From 47bce8ef7875420b2e26ebd834ed0d4146b65d5b Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 24 Jun 2007 17:32:31 -0700 Subject: Better handling of deferred targets. --HG-- extra : convert_revision : 0fbc28c32c1eeb3dd672df14c1d53bd516f81d0f --- src/mem/cache/base_cache.cc | 3 +- src/mem/cache/base_cache.hh | 2 - src/mem/cache/cache.hh | 3 +- src/mem/cache/cache_impl.hh | 127 ++++++++++++++++++++++---------------------- src/mem/cache/miss/mshr.cc | 90 ++++++++++++++++++++++--------- src/mem/cache/miss/mshr.hh | 17 +++--- 6 files changed, 141 insertions(+), 101 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 1f5182574..ac577f5a2 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -42,8 +42,7 @@ using namespace std; BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL), - blocked(false), waitingOnRetry(false), mustSendRetry(false), - requestCauses(0) + blocked(false), mustSendRetry(false), requestCauses(0) { } diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 27134b2ad..b35fc0811 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -118,8 +118,6 @@ class BaseCache : public MemObject bool blocked; - bool waitingOnRetry; - bool mustSendRetry; /** diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index a93b761ec..2a95dc53c 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -182,8 +182,7 @@ class Cache : public BaseCache BlkType *handleFill(PacketPtr pkt, BlkType *blk, PacketList &writebacks); - bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk); - bool satisfyTarget(MSHR::Target *target, BlkType *blk); + void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk); bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk); void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 0649b5061..b4d334249 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -368,7 +368,7 @@ Cache::timingAccess(PacketPtr pkt) if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { mshr->threadNum = -1; } - mshr->allocateTarget(pkt, true); + mshr->allocateTarget(pkt); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); @@ -483,8 +483,7 @@ Cache::atomicAccess(PacketPtr pkt) if (isCacheFill) { PacketList writebacks; blk = handleFill(busPkt, blk, writebacks); - bool status = satisfyCpuSideRequest(pkt, blk); - assert(status); + satisfyCpuSideRequest(pkt, blk); delete busPkt; // Handle writebacks if needed @@ -538,12 +537,14 @@ Cache::functionalAccess(PacketPtr pkt, // There can be many matching outstanding writes. std::vector writes; - writeBuffer.findMatches(blk_addr, writes); + assert(!writeBuffer.findMatches(blk_addr, writes)); +/* Need to change this to iterate through targets in mshr?? for (int i = 0; i < writes.size(); ++i) { MSHR *mshr = writes[i]; if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData)) return; } +*/ otherSidePort->checkAndSendFunctional(pkt); } @@ -557,43 +558,30 @@ Cache::functionalAccess(PacketPtr pkt, template -bool +void Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) { - if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) { - assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); - assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); - - if (pkt->isWrite()) { - if (blk->checkWrite(pkt)) { - blk->status |= BlkDirty; - pkt->writeDataToBlock(blk->data, blkSize); - } - } else if (pkt->isReadWrite()) { - cmpAndSwap(blk, pkt); - } else { - if (pkt->isLocked()) { - blk->trackLoadLocked(pkt); - } - pkt->setDataFromBlock(blk->data, blkSize); + assert(blk); + assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); + assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); + assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); + + if (pkt->isWrite()) { + if (blk->checkWrite(pkt)) { + blk->status |= BlkDirty; + pkt->writeDataToBlock(blk->data, blkSize); } - - return true; + } else if (pkt->isReadWrite()) { + cmpAndSwap(blk, pkt); } else { - return false; + if (pkt->isLocked()) { + blk->trackLoadLocked(pkt); + } + pkt->setDataFromBlock(blk->data, blkSize); } } -template -bool -Cache::satisfyTarget(MSHR::Target *target, BlkType *blk) -{ - assert(target != NULL); - assert(target->isCpuSide()); - return satisfyCpuSideRequest(target->pkt, blk); -} - template bool Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, @@ -611,37 +599,42 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, while (mshr->hasTargets()) { MSHR::Target *target = mshr->getTarget(); - if (!satisfyTarget(target, blk)) { - // Invalid access, need to do another request - // can occur if block is invalidated, or not correct - // permissions - MSHRQueue *mq = mshr->queue; - mq->markPending(mshr); - mshr->order = order++; - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); - return false; - } + if (target->isCpuSide()) { + satisfyCpuSideRequest(target->pkt, blk); + // How many bytes pass the first request is this one + int transfer_offset = + target->pkt->getOffset(blkSize) - initial_offset; + if (transfer_offset < 0) { + transfer_offset += blkSize; + } + // If critical word (no offset) return first word time + Tick completion_time = tags->getHitLatency() + + transfer_offset ? pkt->finishTime : pkt->firstWordTime; - // How many bytes pass the first request is this one - int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; - if (transfer_offset < 0) { - transfer_offset += blkSize; + if (!target->pkt->req->isUncacheable()) { + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->time; + } + target->pkt->makeTimingResponse(); + cpuSidePort->respond(target->pkt, completion_time); + } else { + // response to snoop request + DPRINTF(Cache, "processing deferred snoop...\n"); + handleSnoop(target->pkt, blk, true); } - // If critical word (no offset) return first word time - Tick completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; - - if (!target->pkt->req->isUncacheable()) { - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->time; - } - target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, completion_time); mshr->popTarget(); } + if (mshr->promoteDeferredTargets()) { + MSHRQueue *mq = mshr->queue; + mq->markPending(mshr); + mshr->order = order++; + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + return false; + } + return true; } @@ -653,6 +646,7 @@ Cache::handleResponse(PacketPtr pkt) Tick time = curTick + hitLatency; MSHR *mshr = dynamic_cast(pkt->senderState); assert(mshr); + if (pkt->result == Packet::Nacked) { //pkt->reinitFromRequest(); warn("NACKs from devices not connected to the same bus " @@ -661,7 +655,7 @@ Cache::handleResponse(PacketPtr pkt) } assert(pkt->result != Packet::BadAddress); assert(pkt->result == Packet::Success); - DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); + DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr()); MSHRQueue *mq = mshr->queue; bool wasFull = mq->isFull(); @@ -883,7 +877,12 @@ Cache::snoopTiming(PacketPtr pkt) MSHR *mshr = mshrQueue.findMatch(blk_addr); // better not be snooping a request that conflicts with something // we have outstanding... - assert(!mshr || !mshr->inService); + if (mshr && mshr->inService) { + assert(mshr->getNumTargets() < numTarget); //handle later + mshr->allocateSnoopTarget(pkt); + assert(mshr->getNumTargets() < numTarget); //handle later + return; + } //We also need to check the writeback buffers and handle those std::vector writebacks; @@ -895,6 +894,9 @@ Cache::snoopTiming(PacketPtr pkt) for (int i=0; iisUncacheable()); + assert(mshr->getNumTargets() == 1); + PacketPtr wb_pkt = mshr->getTarget()->pkt; + assert(wb_pkt->cmd == MemCmd::Writeback); if (pkt->isRead()) { pkt->assertMemInhibit(); @@ -906,7 +908,7 @@ Cache::snoopTiming(PacketPtr pkt) // the packet's invalidate flag is set... assert(pkt->isInvalidate()); } - doTimingSupplyResponse(pkt, mshr->writeData); + doTimingSupplyResponse(pkt, wb_pkt->getPtr()); } if (pkt->isInvalidate()) { @@ -1208,7 +1210,7 @@ Cache::MemSidePort::sendPacket() waitingOnRetry = !success; if (waitingOnRetry) { - DPRINTF(CachePort, "%s now waiting on a retry\n", name()); + DPRINTF(CachePort, "now waiting on a retry\n"); } else { myCache()->markInService(mshr); } @@ -1220,8 +1222,7 @@ Cache::MemSidePort::sendPacket() if (!waitingOnRetry) { if (isBusRequested()) { // more requests/writebacks: rerequest ASAP - DPRINTF(CachePort, "%s still more MSHR requests to send\n", - name()); + DPRINTF(CachePort, "still more MSHR requests to send\n"); sendEvent->schedule(curTick+1); } else if (!transmitList.empty()) { // deferred packets: rerequest bus, but possibly not until later diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 1f2c05a6e..24ff3b33c 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -68,12 +68,16 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target) // Don't know of a case where we would allocate a new MSHR for a // snoop (mem0-side request), so set cpuSide to true here. targets.push_back(Target(target, true)); + assert(deferredTargets.empty()); + deferredNeedsExclusive = false; + pendingInvalidate = false; } void MSHR::deallocate() { assert(targets.empty()); + assert(deferredTargets.empty()); assert(ntargets == 0); inService = false; //allocIter = NULL; @@ -84,41 +88,77 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr target, bool cpuSide) +MSHR::allocateTarget(PacketPtr target) { - //If we append an invalidate and we issued a read to the bus, - //but now have some pending writes, we need to move - //the invalidate to before the first non-read - if (inService && !inServiceForExclusive && needsExclusive - && !cpuSide && target->isInvalidate()) { - std::list temp; - - while (!targets.empty()) { - if (targets.front().pkt->needsExclusive()) break; - //Place on top of temp stack - temp.push_front(targets.front()); - //Remove from targets - targets.pop_front(); + if (inService) { + if (!deferredTargets.empty() || pendingInvalidate || + (!needsExclusive && target->needsExclusive())) { + // need to put on deferred list + deferredTargets.push_back(Target(target, true)); + if (target->needsExclusive()) { + deferredNeedsExclusive = true; + } + } else { + // still OK to append to outstanding request + targets.push_back(Target(target, true)); + } + } else { + if (target->needsExclusive()) { + needsExclusive = true; } - //Now that we have all the reads off until first non-read, we can - //place the invalidate on - targets.push_front(Target(target, cpuSide)); + targets.push_back(Target(target, true)); + } - //Now we pop off the temp_stack and put them back - while (!temp.empty()) { - targets.push_front(temp.front()); - temp.pop_front(); - } + ++ntargets; +} + +void +MSHR::allocateSnoopTarget(PacketPtr target) +{ + assert(inService); // don't bother to call otherwise + + if (pendingInvalidate) { + // a prior snoop has already appended an invalidation, so + // logically we don't have the block anymore... + return; } - else { - targets.push_back(Target(target, cpuSide)); + + if (needsExclusive) { + // We're awaiting an exclusive copy, so ownership is pending. + // It's up to us to respond once the data arrives. + target->assertMemInhibit(); + } else if (target->needsExclusive()) { + // This transaction will take away our pending copy + pendingInvalidate = true; + } else { + // If we're not going to supply data or perform an + // invalidation, we don't need to save this. + return; } + targets.push_back(Target(target, false)); ++ntargets; +} + + +bool +MSHR::promoteDeferredTargets() +{ + if (deferredTargets.empty()) { + return false; + } + + assert(targets.empty()); + targets = deferredTargets; + deferredTargets.clear(); assert(targets.size() == ntargets); - needsExclusive = needsExclusive || target->needsExclusive(); + needsExclusive = deferredNeedsExclusive; + pendingInvalidate = false; + deferredNeedsExclusive = false; + + return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 195438e46..f4e090a12 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -85,9 +85,6 @@ class MSHR : public Packet::SenderState /** Size of the request. */ int size; - /** Data associated with the request (if a write). */ - uint8_t *writeData; - /** True if the request has been sent to the bus. */ bool inService; @@ -95,12 +92,13 @@ class MSHR : public Packet::SenderState bool isCacheFill; /** True if we need to get an exclusive copy of the block. */ bool needsExclusive; + /** True if the request is uncacheable */ bool _isUncacheable; - /** True if the request that has been sent to the bus is for en - * exclusive copy. */ - bool inServiceForExclusive; + bool deferredNeedsExclusive; + bool pendingInvalidate; + /** Thread number of the miss. */ short threadNum; /** The number of currently allocated targets. */ @@ -124,6 +122,8 @@ private: /** List of all requests that match the address */ TargetList targets; + TargetList deferredTargets; + public: bool isUncacheable() { return _isUncacheable; } @@ -153,7 +153,8 @@ public: * Add a request to the list of targets. * @param target The target. */ - void allocateTarget(PacketPtr target, bool cpuSide); + void allocateTarget(PacketPtr target); + void allocateSnoopTarget(PacketPtr target); /** A simple constructor. */ MSHR(); @@ -201,6 +202,8 @@ public: return tgt->isCpuSide() && !tgt->pkt->needsResponse(); } + bool promoteDeferredTargets(); + /** * Prints the contents of this MSHR to stderr. */ -- cgit v1.2.3 From 529f12a531c331e4bdcf595a3aaf65ee5ef6b72d Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 25 Jun 2007 06:47:05 -0700 Subject: Get rid of requestCauses. Use timestamped queue to make sure we don't re-request bus prematurely. Use callback to avoid calling sendRetry() recursively within recvTiming. --HG-- extra : convert_revision : a907a2781b4b00aa8eb1ea7147afc81d6b424140 --- src/mem/cache/base_cache.cc | 6 ++++-- src/mem/cache/base_cache.hh | 42 ++++++++++++------------------------- src/mem/cache/cache_impl.hh | 28 ++++++++++++------------- src/mem/cache/miss/mshr.cc | 24 +++++++++++++-------- src/mem/cache/miss/mshr.hh | 26 +++++++++++------------ src/mem/cache/miss/mshr_queue.cc | 45 +++++++++++++++++++++++++++++----------- src/mem/cache/miss/mshr_queue.hh | 23 +++++++++++++------- src/mem/tport.cc | 22 +++++++++++--------- src/mem/tport.hh | 18 ++++++++++++++++ 9 files changed, 137 insertions(+), 97 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index ac577f5a2..5062d6e87 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -42,7 +42,7 @@ using namespace std; BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache) : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL), - blocked(false), mustSendRetry(false), requestCauses(0) + blocked(false), mustSendRetry(false) { } @@ -116,7 +116,9 @@ BaseCache::CachePort::clearBlocked() { DPRINTF(Cache, "Cache Sending Retry\n"); mustSendRetry = false; - sendRetry(); + SendRetryEvent *ev = new SendRetryEvent(this, true); + // @TODO: need to find a better time (next bus cycle?) + ev->schedule(curTick + 1); } } diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index b35fc0811..09484a14a 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "base/misc.hh" @@ -105,6 +106,9 @@ class BaseCache : public MemObject bool recvRetryCommon(); + typedef EventWrapper + SendRetryEvent; + public: void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; } @@ -120,27 +124,12 @@ class BaseCache : public MemObject bool mustSendRetry; - /** - * Bit vector for the outstanding requests for the master interface. - */ - uint8_t requestCauses; - - bool isBusRequested() { return requestCauses != 0; } - void requestBus(RequestCause cause, Tick time) { DPRINTF(Cache, "Asserting bus request for cause %d\n", cause); - if (!isBusRequested() && !waitingOnRetry) { - assert(!sendEvent->scheduled()); - sendEvent->schedule(time); + if (!waitingOnRetry) { + schedSendEvent(time); } - requestCauses |= (1 << cause); - } - - void deassertBusRequest(RequestCause cause) - { - DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause); - requestCauses &= ~(1 << cause); } void respond(PacketPtr pkt, Tick time) { @@ -163,8 +152,7 @@ class BaseCache : public MemObject MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size, PacketPtr pkt, Tick time, bool requestBus) { - MSHR *mshr = mq->allocate(addr, size, pkt); - mshr->order = order++; + MSHR *mshr = mq->allocate(addr, size, pkt, time, order++); if (mq->isFull()) { setBlocked((BlockedCause)mq->index); @@ -182,9 +170,6 @@ class BaseCache : public MemObject MSHRQueue *mq = mshr->queue; bool wasFull = mq->isFull(); mq->markInService(mshr); - if (!mq->havePending()) { - deassertMemSideBusRequest((RequestCause)mq->index); - } if (wasFull && !mq->isFull()) { clearBlocked((BlockedCause)mq->index); } @@ -491,13 +476,10 @@ class BaseCache : public MemObject } } - /** - * True if the memory-side bus should be requested. - * @return True if there are outstanding requests for the master bus. - */ - bool isMemSideBusRequested() + Tick nextMSHRReadyTick() { - return memSidePort->isBusRequested(); + return std::min(mshrQueue.nextMSHRReadyTick(), + writeBuffer.nextMSHRReadyTick()); } /** @@ -516,7 +498,9 @@ class BaseCache : public MemObject */ void deassertMemSideBusRequest(RequestCause cause) { - memSidePort->deassertBusRequest(cause); + // obsolete!! + assert(false); + // memSidePort->deassertBusRequest(cause); // checkDrain(); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b4d334249..7610b5a41 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -185,9 +185,6 @@ Cache::squash(int threadNum) cause = Blocked_NoMSHRs; } mshrQueue.squash(threadNum); - if (!mshrQueue.havePending()) { - deassertMemSideBusRequest(Request_MSHR); - } if (unblock && !mshrQueue.isFull()) { clearBlocked(cause); } @@ -368,11 +365,14 @@ Cache::timingAccess(PacketPtr pkt) if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { mshr->threadNum = -1; } - mshr->allocateTarget(pkt); + mshr->allocateTarget(pkt, time, order++); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); - mshrQueue.moveToFront(mshr); + // need to be careful with this... if this mshr isn't + // ready yet (i.e. time > curTick_, we don't want to + // move it ahead of mshrs that are ready + // mshrQueue.moveToFront(mshr); } } else { // no MSHR @@ -630,7 +630,6 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, if (mshr->promoteDeferredTargets()) { MSHRQueue *mq = mshr->queue; mq->markPending(mshr); - mshr->order = order++; requestMemSideBus((RequestCause)mq->index, pkt->finishTime); return false; } @@ -879,7 +878,7 @@ Cache::snoopTiming(PacketPtr pkt) // we have outstanding... if (mshr && mshr->inService) { assert(mshr->getNumTargets() < numTarget); //handle later - mshr->allocateSnoopTarget(pkt); + mshr->allocateSnoopTarget(pkt, curTick, order++); assert(mshr->getNumTargets() < numTarget); //handle later return; } @@ -1202,6 +1201,7 @@ Cache::MemSidePort::sendPacket() } else { // check for non-response packets (requests & writebacks) PacketPtr pkt = myCache()->getTimingPacket(); + assert(pkt != NULL); MSHR *mshr = dynamic_cast(pkt->senderState); bool success = sendTiming(pkt); @@ -1220,14 +1220,12 @@ Cache::MemSidePort::sendPacket() // tried to send packet... if it was successful (no retry), see if // we need to rerequest bus or not if (!waitingOnRetry) { - if (isBusRequested()) { - // more requests/writebacks: rerequest ASAP - DPRINTF(CachePort, "still more MSHR requests to send\n"); - sendEvent->schedule(curTick+1); - } else if (!transmitList.empty()) { - // deferred packets: rerequest bus, but possibly not until later - Tick time = transmitList.front().tick; - sendEvent->schedule(time <= curTick ? curTick+1 : time); + Tick nextReady = std::min(deferredPacketReadyTick(), + myCache()->nextMSHRReadyTick()); + // @TODO: need to facotr in prefetch requests here somehow + if (nextReady != MaxTick) { + DPRINTF(CachePort, "more packets to send @ %d\n", nextReady); + sendEvent->schedule(std::max(nextReady, curTick + 1)); } else { // no more to send right now: if we're draining, we may be done if (drainEvent) { diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 24ff3b33c..8fa11ab2e 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -37,6 +37,7 @@ #include #include #include +#include #include "mem/cache/miss/mshr.hh" #include "sim/core.hh" // for curTick @@ -54,10 +55,13 @@ MSHR::MSHR() } void -MSHR::allocate(Addr _addr, int _size, PacketPtr target) +MSHR::allocate(Addr _addr, int _size, PacketPtr target, + Tick when, Counter _order) { addr = _addr; size = _size; + readyTick = when; + order = _order; assert(target); isCacheFill = false; needsExclusive = target->needsExclusive(); @@ -66,8 +70,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target) threadNum = 0; ntargets = 1; // Don't know of a case where we would allocate a new MSHR for a - // snoop (mem0-side request), so set cpuSide to true here. - targets.push_back(Target(target, true)); + // snoop (mem-side request), so set cpuSide to true here. + targets.push_back(Target(target, when, _order, true)); assert(deferredTargets.empty()); deferredNeedsExclusive = false; pendingInvalidate = false; @@ -88,33 +92,33 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr target) +MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order) { if (inService) { if (!deferredTargets.empty() || pendingInvalidate || (!needsExclusive && target->needsExclusive())) { // need to put on deferred list - deferredTargets.push_back(Target(target, true)); + deferredTargets.push_back(Target(target, when, _order, true)); if (target->needsExclusive()) { deferredNeedsExclusive = true; } } else { // still OK to append to outstanding request - targets.push_back(Target(target, true)); + targets.push_back(Target(target, when, _order, true)); } } else { if (target->needsExclusive()) { needsExclusive = true; } - targets.push_back(Target(target, true)); + targets.push_back(Target(target, when, _order, true)); } ++ntargets; } void -MSHR::allocateSnoopTarget(PacketPtr target) +MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order) { assert(inService); // don't bother to call otherwise @@ -137,7 +141,7 @@ MSHR::allocateSnoopTarget(PacketPtr target) return; } - targets.push_back(Target(target, false)); + targets.push_back(Target(target, when, _order, false)); ++ntargets; } @@ -157,6 +161,8 @@ MSHR::promoteDeferredTargets() needsExclusive = deferredNeedsExclusive; pendingInvalidate = false; deferredNeedsExclusive = false; + order = targets.front().order; + readyTick = std::max(curTick, targets.front().time); return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index f4e090a12..92288cf52 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -55,13 +55,14 @@ class MSHR : public Packet::SenderState class Target { public: Tick time; //!< Time when request was received (for stats) + Counter order; //!< Global order (for memory consistency mgmt) PacketPtr pkt; //!< Pending request packet. bool cpuSide; //!< Did request come from cpu side or mem side? bool isCpuSide() { return cpuSide; } - Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick) - : time(_time), pkt(_pkt), cpuSide(_cpuSide) + Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide) + : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide) {} }; @@ -79,6 +80,12 @@ class MSHR : public Packet::SenderState /** Pointer to queue containing this MSHR. */ MSHRQueue *queue; + /** Cycle when ready to issue */ + Tick readyTick; + + /** Order number assigned by the miss queue. */ + Counter order; + /** Address of the request. */ Addr addr; @@ -103,8 +110,6 @@ class MSHR : public Packet::SenderState short threadNum; /** The number of currently allocated targets. */ short ntargets; - /** Order number of assigned by the miss queue. */ - uint64_t order; /** * Pointer to this MSHR on the ready list. @@ -136,13 +141,8 @@ public: * @param size The number of bytes to request. * @param pkt The original miss. */ - void allocate(Addr addr, int size, PacketPtr pkt); - - /** - * Allocate this MSHR as a buffer for the given request. - * @param target The memory request to buffer. - */ - void allocateAsBuffer(PacketPtr target); + void allocate(Addr addr, int size, PacketPtr pkt, + Tick when, Counter _order); /** * Mark this MSHR as free. @@ -153,8 +153,8 @@ public: * Add a request to the list of targets. * @param target The target. */ - void allocateTarget(PacketPtr target); - void allocateSnoopTarget(PacketPtr target); + void allocateTarget(PacketPtr target, Tick when, Counter order); + void allocateSnoopTarget(PacketPtr target, Tick when, Counter order); /** A simple constructor. */ MSHR(); diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 3407e2588..18184bd20 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -90,8 +90,8 @@ MSHRQueue::findMatches(Addr addr, vector& matches) const MSHR * MSHRQueue::findPending(Addr addr, int size) const { - MSHR::ConstIterator i = pendingList.begin(); - MSHR::ConstIterator end = pendingList.end(); + MSHR::ConstIterator i = readyList.begin(); + MSHR::ConstIterator end = readyList.end(); for (; i != end; ++i) { MSHR *mshr = *i; if (mshr->addr < addr) { @@ -107,17 +107,37 @@ MSHRQueue::findPending(Addr addr, int size) const return NULL; } + +MSHR::Iterator +MSHRQueue::addToReadyList(MSHR *mshr) +{ + if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) { + return readyList.insert(readyList.end(), mshr); + } + + MSHR::Iterator i = readyList.begin(); + MSHR::Iterator end = readyList.end(); + for (; i != end; ++i) { + if ((*i)->readyTick > mshr->readyTick) { + return readyList.insert(i, mshr); + } + } + assert(false); +} + + MSHR * -MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt) +MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, + Tick when, Counter order) { assert(!freeList.empty()); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - mshr->allocate(addr, size, pkt); + mshr->allocate(addr, size, pkt, when, order); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); - mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + mshr->readyIter = addToReadyList(mshr); allocated += 1; return mshr; @@ -139,7 +159,7 @@ MSHRQueue::deallocateOne(MSHR *mshr) if (mshr->inService) { inServiceEntries--; } else { - pendingList.erase(mshr->readyIter); + readyList.erase(mshr->readyIter); } mshr->deallocate(); return retval; @@ -150,14 +170,15 @@ MSHRQueue::moveToFront(MSHR *mshr) { if (!mshr->inService) { assert(mshr == *(mshr->readyIter)); - pendingList.erase(mshr->readyIter); - mshr->readyIter = pendingList.insert(pendingList.begin(), mshr); + readyList.erase(mshr->readyIter); + mshr->readyIter = readyList.insert(readyList.begin(), mshr); } } void MSHRQueue::markInService(MSHR *mshr) { + assert(!mshr->inService); if (mshr->isSimpleForward()) { // we just forwarded the request packet & don't expect a // response, so get rid of it @@ -167,23 +188,23 @@ MSHRQueue::markInService(MSHR *mshr) return; } mshr->inService = true; - pendingList.erase(mshr->readyIter); + readyList.erase(mshr->readyIter); //mshr->readyIter = NULL; inServiceEntries += 1; - //pendingList.pop_front(); + //readyList.pop_front(); } void MSHRQueue::markPending(MSHR *mshr) { - //assert(mshr->readyIter == NULL); + assert(mshr->inService); mshr->inService = false; --inServiceEntries; /** * @ todo might want to add rerequests to front of pending list for * performance. */ - mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + mshr->readyIter = addToReadyList(mshr); } void diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 806aa9c64..fd61dec8b 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -51,7 +51,7 @@ class MSHRQueue /** Holds pointers to all allocated entries. */ MSHR::List allocatedList; /** Holds pointers to entries that haven't been sent to the bus. */ - MSHR::List pendingList; + MSHR::List readyList; /** Holds non allocated entries. */ MSHR::List freeList; @@ -69,6 +69,9 @@ class MSHRQueue */ const int numReserve; + MSHR::Iterator addToReadyList(MSHR *mshr); + + public: /** The number of allocated entries. */ int allocated; @@ -121,7 +124,8 @@ class MSHRQueue * * @pre There are free entries. */ - MSHR *allocate(Addr addr, int size, PacketPtr &pkt); + MSHR *allocate(Addr addr, int size, PacketPtr &pkt, + Tick when, Counter order); /** * Removes the given MSHR from the queue. This places the MSHR on the @@ -147,7 +151,7 @@ class MSHRQueue /** * Mark the given MSHR as in service. This removes the MSHR from the - * pendingList. Deallocates the MSHR if it does not expect a response. + * readyList. Deallocates the MSHR if it does not expect a response. * @param mshr The MSHR to mark in service. */ void markInService(MSHR *mshr); @@ -171,7 +175,7 @@ class MSHRQueue */ bool havePending() const { - return !pendingList.empty(); + return !readyList.empty(); } /** @@ -184,15 +188,20 @@ class MSHRQueue } /** - * Returns the MSHR at the head of the pendingList. + * Returns the MSHR at the head of the readyList. * @return The next request to service. */ MSHR *getNextMSHR() const { - if (pendingList.empty()) { + if (readyList.empty() || readyList.front()->readyTick > curTick) { return NULL; } - return pendingList.front(); + return readyList.front(); + } + + Tick nextMSHRReadyTick() const + { + return readyList.empty() ? MaxTick : readyList.front()->readyTick; } }; diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 2644a504c..0a2127490 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -91,28 +91,30 @@ SimpleTimingPort::schedSendTiming(PacketPtr pkt, Tick when) assert(when > curTick); // Nothing is on the list: add it and schedule an event - if (transmitList.empty()) { - assert(!sendEvent->scheduled()); - sendEvent->schedule(when); - transmitList.push_back(DeferredPacket(when, pkt)); + if (transmitList.empty() || when < transmitList.front().tick) { + transmitList.push_front(DeferredPacket(when, pkt)); + schedSendEvent(when); return; } - // something is on the list and this belongs at the end + // list is non-empty and this is not the head, so event should + // already be scheduled + assert(waitingOnRetry || + (sendEvent->scheduled() && sendEvent->when() <= when)); + + // list is non-empty & this belongs at the end if (when >= transmitList.back().tick) { transmitList.push_back(DeferredPacket(when, pkt)); return; } - // Something is on the list and this belongs somewhere else + + // this belongs in the middle somewhere DeferredPacketIterator i = transmitList.begin(); + i++; // already checked for insertion at front DeferredPacketIterator end = transmitList.end(); for (; i != end; ++i) { if (when < i->tick) { - if (i == transmitList.begin()) { - //Inserting at begining, reschedule - sendEvent->reschedule(when); - } transmitList.insert(i, DeferredPacket(when, pkt)); return; } diff --git a/src/mem/tport.hh b/src/mem/tport.hh index ea0f05ed1..bfed29f34 100644 --- a/src/mem/tport.hh +++ b/src/mem/tport.hh @@ -105,6 +105,24 @@ class SimpleTimingPort : public Port bool deferredPacketReady() { return !transmitList.empty() && transmitList.front().tick <= curTick; } + Tick deferredPacketReadyTick() + { return transmitList.empty() ? MaxTick : transmitList.front().tick; } + + void schedSendEvent(Tick when) + { + if (waitingOnRetry) { + assert(!sendEvent->scheduled()); + return; + } + + if (!sendEvent->scheduled()) { + sendEvent->schedule(when); + } else if (sendEvent->when() > when) { + sendEvent->reschedule(when); + } + } + + /** Schedule a sendTiming() event to be called in the future. * @param pkt packet to send * @param absolute time (in ticks) to send packet -- cgit v1.2.3 From f697e959a17646500bca7c12e6bb7b30e047f1bb Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 25 Jun 2007 22:23:29 -0700 Subject: Couple minor bug fixes... src/mem/cache/cache_impl.hh: Handle grants with no packet. src/mem/cache/miss/mshr.cc: Fix MSHR snoop hit handling. --HG-- extra : convert_revision : f365283afddaa07cb9e050b2981ad6a898c14451 --- src/mem/cache/cache_impl.hh | 27 ++++++++++++++++----------- src/mem/cache/miss/mshr.cc | 10 ++++++---- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 7610b5a41..48efc5ca3 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -912,7 +912,6 @@ Cache::snoopTiming(PacketPtr pkt) if (pkt->isInvalidate()) { // Invalidation trumps our writeback... discard here - assert(0); markInService(mshr); } return; @@ -1201,18 +1200,24 @@ Cache::MemSidePort::sendPacket() } else { // check for non-response packets (requests & writebacks) PacketPtr pkt = myCache()->getTimingPacket(); - assert(pkt != NULL); - MSHR *mshr = dynamic_cast(pkt->senderState); + if (pkt == NULL) { + // can happen if e.g. we attempt a writeback and fail, but + // before the retry, the writeback is eliminated because + // we snoop another cache's ReadEx. + waitingOnRetry = false; + } else { + MSHR *mshr = dynamic_cast(pkt->senderState); - bool success = sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "successful" : "unsuccessful"); + bool success = sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "successful" : "unsuccessful"); - waitingOnRetry = !success; - if (waitingOnRetry) { - DPRINTF(CachePort, "now waiting on a retry\n"); - } else { - myCache()->markInService(mshr); + waitingOnRetry = !success; + if (waitingOnRetry) { + DPRINTF(CachePort, "now waiting on a retry\n"); + } else { + myCache()->markInService(mshr); + } } } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 8fa11ab2e..fc8d2175e 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -132,13 +132,15 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order) // We're awaiting an exclusive copy, so ownership is pending. // It's up to us to respond once the data arrives. target->assertMemInhibit(); - } else if (target->needsExclusive()) { + } + + if (target->needsExclusive()) { // This transaction will take away our pending copy pendingInvalidate = true; } else { - // If we're not going to supply data or perform an - // invalidation, we don't need to save this. - return; + // We'll keep our pending copy, but we can't let the other guy + // think he's getting it exclusive + target->assertShared(); } targets.push_back(Target(target, when, _order, false)); -- cgit v1.2.3 From 7dacbcf49262605a75e461149ec7bd7a00fca7b7 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 26 Jun 2007 14:53:15 -0700 Subject: Handle replacement of block with pending upgrade. src/mem/cache/tags/lru.cc: Add some replacement DPRINTFs --HG-- extra : convert_revision : 7993ec24d6af7e7774d04ce36f20e3f43f887fd9 --- src/mem/cache/cache_impl.hh | 27 ++++++++++++++++++----- src/mem/cache/miss/mshr.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++ src/mem/cache/miss/mshr.hh | 11 ++++++++++ src/mem/cache/tags/lru.cc | 4 ++++ 4 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 48efc5ca3..d01adde78 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -300,7 +300,6 @@ Cache::timingAccess(PacketPtr pkt) return true; } - PacketList writebacks; int lat = hitLatency; bool satisfied = false; @@ -319,6 +318,8 @@ Cache::timingAccess(PacketPtr pkt) } #if 0 + PacketList writebacks; + // If this is a block size write/hint (WH64) allocate the block here // if the coherence protocol allows it. /** @todo make the fast write alloc (wh64) work with coherence. */ @@ -338,7 +339,6 @@ Cache::timingAccess(PacketPtr pkt) ++fastWrites; } } -#endif // copy writebacks to write buffer while (!writebacks.empty()) { @@ -346,6 +346,7 @@ Cache::timingAccess(PacketPtr pkt) allocateBuffer(wbPkt, time, true); writebacks.pop_front(); } +#endif bool needsResponse = pkt->needsResponse(); @@ -676,6 +677,15 @@ Cache::handleResponse(PacketPtr pkt) DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); + + if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) { + if (!mshr->handleReplacedPendingUpgrade(pkt)) { + mq->markPending(mshr); + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + return; + } + } + PacketList writebacks; blk = handleFill(pkt, blk, writebacks); deallocate = satisfyMSHR(mshr, pkt, blk); @@ -747,15 +757,20 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, Addr addr = pkt->getAddr(); if (blk == NULL) { - // better have read new data + // better have read new data... assert(pkt->isRead()); // need to do a replacement blk = tags->findReplacement(addr, writebacks); if (blk->isValid()) { + Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set); + MSHR *repl_mshr = mshrQueue.findMatch(repl_addr); + if (repl_mshr) { + repl_mshr->handleReplacement(blk, blkSize); + } + DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", - tags->regenerateBlkAddr(blk->tag, blk->set), addr, - blk->isDirty() ? "writeback" : "clean"); + repl_addr, addr, blk->isDirty() ? "writeback" : "clean"); if (blk->isDirty()) { // Save writeback packet for handling by caller @@ -992,7 +1007,7 @@ Cache::getNextMSHR() return conflict_mshr; } - // No conclifts; issue read + // No conflicts; issue read return miss_mshr; } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index fc8d2175e..ca5e38601 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -75,6 +75,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, assert(deferredTargets.empty()); deferredNeedsExclusive = false; pendingInvalidate = false; + replacedPendingUpgrade = false; + data = NULL; } void @@ -170,6 +172,57 @@ MSHR::promoteDeferredTargets() } +void +MSHR::handleReplacement(CacheBlk *blk, int blkSize) +{ + // must be an outstanding upgrade request on block we're about to + // replace... + assert(!blk->isWritable()); + assert(needsExclusive); + replacedPendingUpgrade = true; + + // if it's dirty, just remember what happened and allow the + // writeback to continue. we'll reissue a ReadEx later whether + // the upgrade succeeds or not + if (blk->isDirty()) { + replacedPendingUpgradeDirty = true; + return; + } + + // if not dirty, we need to save it off as it will be only valid + // copy in system if upgrade is successful (and may need to be + // written back then, as the current owner if any will be + // invalidating its block) + replacedPendingUpgradeDirty = false; + data = new uint8_t[blkSize]; + std::memcpy(data, blk->data, blkSize); +} + + +bool +MSHR::handleReplacedPendingUpgrade(Packet *pkt) +{ + // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel + assert(pkt->cmd == MemCmd::UpgradeResp); + assert(replacedPendingUpgrade); + replacedPendingUpgrade = false; // reset + if (replacedPendingUpgradeDirty) { + // we wrote back the previous copy; just reissue as a ReadEx + return false; + } + + // previous copy was not dirty, but we are now owner... fake out + // cache by taking saved data and converting UpgradeResp to + // ReadExResp + assert(data); + pkt->cmd = MemCmd::ReadExResp; + pkt->setData(data); + delete [] data; + data = NULL; + return true; +} + + void MSHR::dump() { diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 92288cf52..a9380d99a 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -105,12 +105,20 @@ class MSHR : public Packet::SenderState bool deferredNeedsExclusive; bool pendingInvalidate; + /** Is there a pending upgrade that got replaced? */ + bool replacedPendingUpgrade; + bool replacedPendingUpgradeDirty; /** Thread number of the miss. */ short threadNum; /** The number of currently allocated targets. */ short ntargets; + + /** Data buffer (if needed). Currently used only for pending + * upgrade handling. */ + uint8_t *data; + /** * Pointer to this MSHR on the ready list. * @sa MissQueue, MSHRQueue::readyList @@ -204,6 +212,9 @@ public: bool promoteDeferredTargets(); + void handleReplacement(CacheBlk *blk, int blkSize); + bool handleReplacedPendingUpgrade(Packet *pkt); + /** * Prints the contents of this MSHR to stderr. */ diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 334312aaf..fa46aff7b 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -173,6 +173,8 @@ LRU::findBlock(Addr addr, int &lat) if (blk != NULL) { // move this block to head of the MRU list sets[set].moveToHead(blk); + DPRINTF(Cache, "set %x: moving blk %x to MRU\n", + set, regenerateBlkAddr(tag, set)); if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) { lat = blk->whenReady - curTick; @@ -214,6 +216,8 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) } } + DPRINTF(Cache, "set %x: selecting blk %x for replacement\n", + set, regenerateBlkAddr(blk->tag, set)); return blk; } -- cgit v1.2.3 From 69ff6d9163c431272fc084b8e051996b44590a53 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 26 Jun 2007 18:01:22 -0400 Subject: cache_impl.hh: Change target overflow from assertion to warning. src/mem/cache/cache_impl.hh: Change target overflow from assertion to warning. --HG-- extra : convert_revision : ceca990ed916bbf96dedd4836c40df522803f173 --- src/mem/cache/cache_impl.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index d01adde78..a73612f24 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -892,9 +892,9 @@ Cache::snoopTiming(PacketPtr pkt) // better not be snooping a request that conflicts with something // we have outstanding... if (mshr && mshr->inService) { - assert(mshr->getNumTargets() < numTarget); //handle later mshr->allocateSnoopTarget(pkt, curTick, order++); - assert(mshr->getNumTargets() < numTarget); //handle later + if (mshr->getNumTargets() > numTarget) + warn("allocating bonus target for snoop"); //handle later return; } -- cgit v1.2.3 From 1b20df5607e86d3b384716792274fe01fa4f3f80 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 26 Jun 2007 22:23:10 -0700 Subject: Handle deferred snoops better. --HG-- extra : convert_revision : 703da6128832eb0d5cfed7724e5105f4b3fe4f90 --- src/mem/cache/cache.hh | 6 ++-- src/mem/cache/cache_impl.hh | 34 ++++++++++--------- src/mem/cache/miss/mshr.cc | 82 ++++++++++++++++++++++++++++----------------- src/mem/cache/miss/mshr.hh | 3 +- src/mem/cache/tags/lru.cc | 5 +-- src/mem/tport.cc | 13 ++++++- 6 files changed, 91 insertions(+), 52 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 2a95dc53c..161fb801d 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -185,14 +185,16 @@ class Cache : public BaseCache void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk); bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk); - void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data); + void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data, + bool already_copied); /** * Sets the blk to the new state. * @param blk The cache block being snooped. * @param new_state The new coherence state for the block. */ - void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing); + void handleSnoop(PacketPtr ptk, BlkType *blk, + bool is_timing, bool is_deferred); /** * Create a writeback request for the given block. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a73612f24..599eecc82 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -622,7 +622,7 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, } else { // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true); + handleSnoop(target->pkt, blk, true, true); } mshr->popTarget(); @@ -678,12 +678,10 @@ Cache::handleResponse(PacketPtr pkt) pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); - if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) { - if (!mshr->handleReplacedPendingUpgrade(pkt)) { - mq->markPending(mshr); - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); - return; - } + if (!mshr->handleFill(pkt, blk)) { + mq->markPending(mshr); + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + return; } PacketList writebacks; @@ -814,10 +812,12 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, template void Cache::doTimingSupplyResponse(PacketPtr req_pkt, - uint8_t *blk_data) + uint8_t *blk_data, + bool already_copied) { - // timing-mode snoop responses require a new packet - PacketPtr pkt = new Packet(req_pkt); + // timing-mode snoop responses require a new packet, unless we + // already made a copy... + PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt); pkt->allocate(); pkt->makeTimingResponse(); pkt->setDataFromBlock(blk_data, blkSize); @@ -827,7 +827,7 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, template void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, - bool is_timing) + bool is_timing, bool is_deferred) { if (!blk || !blk->isValid()) { return; @@ -854,9 +854,10 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, } if (supply) { + assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); if (is_timing) { - doTimingSupplyResponse(pkt, blk->data); + doTimingSupplyResponse(pkt, blk->data, is_deferred); } else { pkt->makeAtomicResponse(); pkt->setDataFromBlock(blk->data, blkSize); @@ -892,6 +893,8 @@ Cache::snoopTiming(PacketPtr pkt) // better not be snooping a request that conflicts with something // we have outstanding... if (mshr && mshr->inService) { + DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n", + blk_addr); mshr->allocateSnoopTarget(pkt, curTick, order++); if (mshr->getNumTargets() > numTarget) warn("allocating bonus target for snoop"); //handle later @@ -913,6 +916,7 @@ Cache::snoopTiming(PacketPtr pkt) assert(wb_pkt->cmd == MemCmd::Writeback); if (pkt->isRead()) { + assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); if (!pkt->needsExclusive()) { pkt->assertShared(); @@ -922,7 +926,7 @@ Cache::snoopTiming(PacketPtr pkt) // the packet's invalidate flag is set... assert(pkt->isInvalidate()); } - doTimingSupplyResponse(pkt, wb_pkt->getPtr()); + doTimingSupplyResponse(pkt, wb_pkt->getPtr(), false); } if (pkt->isInvalidate()) { @@ -933,7 +937,7 @@ Cache::snoopTiming(PacketPtr pkt) } } - handleSnoop(pkt, blk, true); + handleSnoop(pkt, blk, true, false); } @@ -948,7 +952,7 @@ Cache::snoopAtomic(PacketPtr pkt) } BlkType *blk = tags->findBlock(pkt->getAddr()); - handleSnoop(pkt, blk, false); + handleSnoop(pkt, blk, false, false); return hitLatency; } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index ca5e38601..23645cb27 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -75,6 +75,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, assert(deferredTargets.empty()); deferredNeedsExclusive = false; pendingInvalidate = false; + pendingShared = false; replacedPendingUpgrade = false; data = NULL; } @@ -120,7 +121,7 @@ MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order) } void -MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order) +MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order) { assert(inService); // don't bother to call otherwise @@ -130,23 +131,33 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order) return; } - if (needsExclusive) { - // We're awaiting an exclusive copy, so ownership is pending. - // It's up to us to respond once the data arrives. - target->assertMemInhibit(); - } + DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr, + needsExclusive ? "needsExclusive" : "", + pkt->needsExclusive() ? "pkt->needsExclusive()" : ""); + + if (needsExclusive || pkt->needsExclusive()) { + // actual target device (typ. PhysicalMemory) will delete the + // packet on reception, so we need to save a copy here + targets.push_back(Target(new Packet(pkt), when, _order, false)); + ++ntargets; + + if (needsExclusive) { + // We're awaiting an exclusive copy, so ownership is pending. + // It's up to us to respond once the data arrives. + pkt->assertMemInhibit(); + } - if (target->needsExclusive()) { - // This transaction will take away our pending copy - pendingInvalidate = true; + if (pkt->needsExclusive()) { + // This transaction will take away our pending copy + pendingInvalidate = true; + } } else { - // We'll keep our pending copy, but we can't let the other guy - // think he's getting it exclusive - target->assertShared(); + // Read to a read: no conflict, so no need to record as + // target, but make sure neither reader thinks he's getting an + // exclusive copy + pendingShared = true; + pkt->assertShared(); } - - targets.push_back(Target(target, when, _order, false)); - ++ntargets; } @@ -164,6 +175,7 @@ MSHR::promoteDeferredTargets() needsExclusive = deferredNeedsExclusive; pendingInvalidate = false; + pendingShared = false; deferredNeedsExclusive = false; order = targets.front().order; readyTick = std::max(curTick, targets.front().time); @@ -200,25 +212,33 @@ MSHR::handleReplacement(CacheBlk *blk, int blkSize) bool -MSHR::handleReplacedPendingUpgrade(Packet *pkt) +MSHR::handleFill(Packet *pkt, CacheBlk *blk) { - // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel - assert(pkt->cmd == MemCmd::UpgradeResp); - assert(replacedPendingUpgrade); - replacedPendingUpgrade = false; // reset - if (replacedPendingUpgradeDirty) { - // we wrote back the previous copy; just reissue as a ReadEx - return false; + if (replacedPendingUpgrade) { + // block was replaced while upgrade request was in service + assert(pkt->cmd == MemCmd::UpgradeResp); + assert(blk == NULL); + assert(replacedPendingUpgrade); + replacedPendingUpgrade = false; // reset + if (replacedPendingUpgradeDirty) { + // we wrote back the previous copy; just reissue as a ReadEx + return false; + } + + // previous copy was not dirty, but we are now owner... fake out + // cache by taking saved data and converting UpgradeResp to + // ReadExResp + assert(data); + pkt->cmd = MemCmd::ReadExResp; + pkt->setData(data); + delete [] data; + data = NULL; + } else if (pendingShared) { + // we snooped another read while this read was in + // service... assert shared line on its behalf + pkt->assertShared(); } - // previous copy was not dirty, but we are now owner... fake out - // cache by taking saved data and converting UpgradeResp to - // ReadExResp - assert(data); - pkt->cmd = MemCmd::ReadExResp; - pkt->setData(data); - delete [] data; - data = NULL; return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index a9380d99a..07fe5c96c 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -105,6 +105,7 @@ class MSHR : public Packet::SenderState bool deferredNeedsExclusive; bool pendingInvalidate; + bool pendingShared; /** Is there a pending upgrade that got replaced? */ bool replacedPendingUpgrade; bool replacedPendingUpgradeDirty; @@ -213,7 +214,7 @@ public: bool promoteDeferredTargets(); void handleReplacement(CacheBlk *blk, int blkSize); - bool handleReplacedPendingUpgrade(Packet *pkt); + bool handleFill(Packet *pkt, CacheBlk *blk); /** * Prints the contents of this MSHR to stderr. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index fa46aff7b..3269aa4db 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -207,6 +207,9 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; + + DPRINTF(Cache, "set %x: selecting blk %x for replacement\n", + set, regenerateBlkAddr(blk->tag, set)); } else if (!blk->isTouched) { tagsInUse++; blk->isTouched = true; @@ -216,8 +219,6 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) } } - DPRINTF(Cache, "set %x: selecting blk %x for replacement\n", - set, regenerateBlkAddr(blk->tag, set)); return blk; } diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 0a2127490..6c8c12ce2 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -69,11 +69,21 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) // if we ever added it back. assert(pkt->isRequest()); assert(pkt->result == Packet::Unknown); + + if (pkt->memInhibitAsserted()) { + // snooper will supply based on copy of packet + // still target's responsibility to delete packet + delete pkt->req; + delete pkt; + return true; + } + bool needsResponse = pkt->needsResponse(); Tick latency = recvAtomic(pkt); // turn packet around to go back to requester if response expected if (needsResponse) { - // recvAtomic() should already have turned packet into atomic response + // recvAtomic() should already have turned packet into + // atomic response assert(pkt->isResponse()); pkt->convertAtomicToTimingResponse(); schedSendTiming(pkt, curTick + latency); @@ -81,6 +91,7 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) delete pkt->req; delete pkt; } + return true; } -- cgit v1.2.3 From c4903e088247ad187356864459d2e4be77d97154 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 26 Jun 2007 23:30:30 -0700 Subject: Revamp replacement-of-upgrade handling. --HG-- extra : convert_revision : 9bc09d8ae6d50e6dfbb4ab21514612f9aa102a2e --- src/mem/cache/cache.hh | 3 +++ src/mem/cache/cache_impl.hh | 44 ++++++++++++++++++++++++++------------ src/mem/cache/miss/mshr.cc | 51 +-------------------------------------------- src/mem/cache/miss/mshr.hh | 6 +----- 4 files changed, 36 insertions(+), 68 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 161fb801d..9e8c35066 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -136,6 +136,9 @@ class Cache : public BaseCache /** Prefetcher */ BasePrefetcher *prefetcher; + /** Temporary cache block for occasional transitory use */ + BlkType *tempBlock; + /** * Can this cache should allocate a block on a line-sized write miss. */ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 599eecc82..3685bc8cb 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -58,6 +58,9 @@ Cache::Cache(const std::string &_name, doFastWrites(params.doFastWrites), prefetchMiss(params.prefetchMiss) { + tempBlock = new BlkType(); + tempBlock->data = new uint8_t[blkSize]; + cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this); memSidePort = new MemSidePort(_name + "-mem_side_port", this); cpuSidePort->setOtherPort(memSidePort); @@ -678,11 +681,8 @@ Cache::handleResponse(PacketPtr pkt) pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); - if (!mshr->handleFill(pkt, blk)) { - mq->markPending(mshr); - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); - return; - } + // give mshr a chance to do some dirty work + mshr->handleFill(pkt, blk); PacketList writebacks; blk = handleFill(pkt, blk, writebacks); @@ -693,6 +693,13 @@ Cache::handleResponse(PacketPtr pkt) allocateBuffer(wbPkt, time, true); writebacks.pop_front(); } + // if we used temp block, clear it out + if (blk == tempBlock) { + if (blk->isDirty()) { + allocateBuffer(writebackBlk(blk), time, true); + } + tags->invalidateBlk(blk); + } } else { if (pkt->req->isUncacheable()) { mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += @@ -764,15 +771,26 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set); MSHR *repl_mshr = mshrQueue.findMatch(repl_addr); if (repl_mshr) { - repl_mshr->handleReplacement(blk, blkSize); - } - - DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", - repl_addr, addr, blk->isDirty() ? "writeback" : "clean"); + // must be an outstanding upgrade request on block + // we're about to replace... + assert(!blk->isWritable()); + assert(repl_mshr->needsExclusive); + // too hard to replace block with transient state; + // just use temporary storage to complete the current + // request and then get rid of it + assert(!tempBlock->isValid()); + blk = tempBlock; + tempBlock->set = tags->extractSet(addr); + DPRINTF(Cache, "using temp block for %x\n", addr); + } else { + DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", + repl_addr, addr, + blk->isDirty() ? "writeback" : "clean"); - if (blk->isDirty()) { - // Save writeback packet for handling by caller - writebacks.push_back(writebackBlk(blk)); + if (blk->isDirty()) { + // Save writeback packet for handling by caller + writebacks.push_back(writebackBlk(blk)); + } } } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 23645cb27..63b3cacc2 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -76,7 +76,6 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, deferredNeedsExclusive = false; pendingInvalidate = false; pendingShared = false; - replacedPendingUpgrade = false; data = NULL; } @@ -185,61 +184,13 @@ MSHR::promoteDeferredTargets() void -MSHR::handleReplacement(CacheBlk *blk, int blkSize) -{ - // must be an outstanding upgrade request on block we're about to - // replace... - assert(!blk->isWritable()); - assert(needsExclusive); - replacedPendingUpgrade = true; - - // if it's dirty, just remember what happened and allow the - // writeback to continue. we'll reissue a ReadEx later whether - // the upgrade succeeds or not - if (blk->isDirty()) { - replacedPendingUpgradeDirty = true; - return; - } - - // if not dirty, we need to save it off as it will be only valid - // copy in system if upgrade is successful (and may need to be - // written back then, as the current owner if any will be - // invalidating its block) - replacedPendingUpgradeDirty = false; - data = new uint8_t[blkSize]; - std::memcpy(data, blk->data, blkSize); -} - - -bool MSHR::handleFill(Packet *pkt, CacheBlk *blk) { - if (replacedPendingUpgrade) { - // block was replaced while upgrade request was in service - assert(pkt->cmd == MemCmd::UpgradeResp); - assert(blk == NULL); - assert(replacedPendingUpgrade); - replacedPendingUpgrade = false; // reset - if (replacedPendingUpgradeDirty) { - // we wrote back the previous copy; just reissue as a ReadEx - return false; - } - - // previous copy was not dirty, but we are now owner... fake out - // cache by taking saved data and converting UpgradeResp to - // ReadExResp - assert(data); - pkt->cmd = MemCmd::ReadExResp; - pkt->setData(data); - delete [] data; - data = NULL; - } else if (pendingShared) { + if (pendingShared) { // we snooped another read while this read was in // service... assert shared line on its behalf pkt->assertShared(); } - - return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 07fe5c96c..4db7b1cfe 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -106,9 +106,6 @@ class MSHR : public Packet::SenderState bool deferredNeedsExclusive; bool pendingInvalidate; bool pendingShared; - /** Is there a pending upgrade that got replaced? */ - bool replacedPendingUpgrade; - bool replacedPendingUpgradeDirty; /** Thread number of the miss. */ short threadNum; @@ -213,8 +210,7 @@ public: bool promoteDeferredTargets(); - void handleReplacement(CacheBlk *blk, int blkSize); - bool handleFill(Packet *pkt, CacheBlk *blk); + void handleFill(Packet *pkt, CacheBlk *blk); /** * Prints the contents of this MSHR to stderr. -- cgit v1.2.3 From 9117c94f9c74f0674d75731385a106d17a1dee09 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Wed, 27 Jun 2007 20:54:13 -0700 Subject: Get rid of coherence protocol object. --HG-- extra : convert_revision : 4ff144342dca23af9a12a2169ca318a002654b42 --- configs/example/memtest.py | 4 - src/mem/cache/cache.cc | 12 +- src/mem/cache/cache.hh | 28 +- src/mem/cache/cache_builder.cc | 94 ++---- src/mem/cache/cache_impl.hh | 191 +++++------ src/mem/cache/coherence/CoherenceProtocol.py | 8 - src/mem/cache/coherence/SConscript | 36 -- src/mem/cache/coherence/coherence_protocol.cc | 469 -------------------------- src/mem/cache/coherence/coherence_protocol.hh | 257 -------------- src/mem/cache/coherence/simple_coherence.hh | 163 --------- 10 files changed, 140 insertions(+), 1122 deletions(-) delete mode 100644 src/mem/cache/coherence/CoherenceProtocol.py delete mode 100644 src/mem/cache/coherence/SConscript delete mode 100644 src/mem/cache/coherence/coherence_protocol.cc delete mode 100644 src/mem/cache/coherence/coherence_protocol.hh delete mode 100644 src/mem/cache/coherence/simple_coherence.hh diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 0bc12e7bd..0e6260b5d 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -48,8 +48,6 @@ parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick, parser.add_option("-n", "--numtesters", type="int", default=8, metavar="N", help="Number of tester pseudo-CPUs [default: %default]") -parser.add_option("-p", "--protocol", default="moesi", - help="Coherence protocol [default: %default]") parser.add_option("-f", "--functional", type="int", default=0, metavar="PCT", @@ -95,7 +93,6 @@ class L1(BaseCache): block_size = block_size mshrs = num_l1_mshrs tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol=options.protocol) # ---------------------- # Base L2 Cache @@ -107,7 +104,6 @@ class L2(BaseCache): mshrs = num_l2_mshrs tgts_per_mshr = 16 write_buffers = 8 - protocol = CoherenceProtocol(protocol=options.protocol) if options.numtesters > block_size: print "Error: Number of testers limited to %s because of false sharing" \ diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index 96f9a2e11..c640d4a60 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -58,8 +58,6 @@ #include "mem/cache/tags/split_lifo.hh" #endif -#include "mem/cache/coherence/simple_coherence.hh" - #include "mem/cache/cache_impl.hh" // Template Instantiations @@ -67,23 +65,23 @@ #if defined(USE_CACHE_FALRU) -template class Cache; +template class Cache; #endif #if defined(USE_CACHE_IIC) -template class Cache; +template class Cache; #endif #if defined(USE_CACHE_LRU) -template class Cache; +template class Cache; #endif #if defined(USE_CACHE_SPLIT) -template class Cache; +template class Cache; #endif #if defined(USE_CACHE_SPLIT_LIFO) -template class Cache; +template class Cache; #endif #endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 9e8c35066..57028a05e 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -39,9 +39,7 @@ #ifndef __CACHE_HH__ #define __CACHE_HH__ -#include "base/compression/base.hh" #include "base/misc.hh" // fatal, panic, and warn -#include "cpu/smt.hh" // SMT_MAX_THREADS #include "mem/cache/base_cache.hh" #include "mem/cache/cache_blk.hh" @@ -55,11 +53,9 @@ class BasePrefetcher; /** * A template-policy based cache. The behavior of the cache can be altered by * supplying different template policies. TagStore handles all tag and data - * storage @sa TagStore. Buffering handles all misses and writes/writebacks - * @sa MissQueue. Coherence handles all coherence policy details @sa - * UniCoherence, SimpleMultiCoherence. + * storage @sa TagStore. */ -template +template class Cache : public BaseCache { public: @@ -76,13 +72,13 @@ class Cache : public BaseCache { public: CpuSidePort(const std::string &_name, - Cache *_cache); + Cache *_cache); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the // cache pointer there. - Cache *myCache() { - return static_cast *>(cache); + Cache *myCache() { + return static_cast *>(cache); } virtual void getDeviceAddressRanges(AddrRangeList &resp, @@ -99,13 +95,13 @@ class Cache : public BaseCache { public: MemSidePort(const std::string &_name, - Cache *_cache); + Cache *_cache); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the // cache pointer there. - Cache *myCache() { - return static_cast *>(cache); + Cache *myCache() { + return static_cast *>(cache); } void sendPacket(); @@ -130,9 +126,6 @@ class Cache : public BaseCache /** Tag and data Storage */ TagStore *tags; - /** Coherence protocol. */ - Coherence *coherence; - /** Prefetcher */ BasePrefetcher *prefetcher; @@ -212,20 +205,19 @@ class Cache : public BaseCache { public: TagStore *tags; - Coherence *coherence; BaseCache::Params baseParams; BasePrefetcher*prefetcher; bool prefetchAccess; const bool doFastWrites; const bool prefetchMiss; - Params(TagStore *_tags, Coherence *coh, + Params(TagStore *_tags, BaseCache::Params params, BasePrefetcher *_prefetcher, bool prefetch_access, int hit_latency, bool do_fast_writes, bool prefetch_miss) - : tags(_tags), coherence(coh), + : tags(_tags), baseParams(params), prefetcher(_prefetcher), prefetchAccess(prefetch_access), doFastWrites(do_fast_writes), diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index 307c851a2..65418b68d 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -42,7 +42,6 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/cache.hh" #include "mem/bus.hh" -#include "mem/cache/coherence/coherence_protocol.hh" #include "sim/builder.hh" // Tag Templates @@ -66,13 +65,6 @@ #include "mem/cache/tags/split_lifo.hh" #endif -// Compression Templates -#include "base/compression/null_compression.hh" -#include "base/compression/lzss_compression.hh" - -// Coherence Templates -#include "mem/cache/coherence/simple_coherence.hh" - //Prefetcher Headers #if defined(USE_GHB) #include "mem/cache/prefetch/ghb_prefetcher.hh" @@ -100,16 +92,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param tgts_per_mshr; Param write_buffers; Param prioritizeRequests; - SimObjectParam protocol; Param trace_addr; Param hash_delay; #if defined(USE_CACHE_IIC) SimObjectParam repl; #endif - Param compressed_bus; - Param store_compressed; - Param adaptive_compression; - Param compression_latency; Param subblock_size; Param max_miss_count; VectorParam > addr_range; @@ -144,23 +131,12 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", false), - INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), INIT_PARAM_DFLT(trace_addr, "address to trace", 0), INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1), #if defined(USE_CACHE_IIC) INIT_PARAM_DFLT(repl, "replacement policy",NULL), #endif - INIT_PARAM_DFLT(compressed_bus, - "This cache connects to a compressed memory", - false), - INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache", - false), - INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme", - false), - INIT_PARAM_DFLT(compression_latency, - "Latency in cycles of compression algorithm", - 0), INIT_PARAM_DFLT(subblock_size, "Size of subblock in IIC used for compression", 0), @@ -188,7 +164,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) END_INIT_SIM_OBJECT_PARAMS(BaseCache) -#define BUILD_CACHE(TAGS, tags, c) \ +#define BUILD_CACHE(TAGS, tags) \ do { \ BasePrefetcher *pf; \ if (pf_policy == "tagged") { \ @@ -203,12 +179,12 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) else { \ BUILD_NULL_PREFETCHER(TAGS); \ } \ - Cache::Params params(tags, coh, base_params, \ - pf, prefetch_access, latency, \ - true, \ - prefetch_miss); \ - Cache *retval = \ - new Cache(getInstanceName(), params); \ + Cache::Params params(tags, base_params, \ + pf, prefetch_access, latency, \ + true, \ + prefetch_miss); \ + Cache *retval = \ + new Cache(getInstanceName(), params); \ return retval; \ } while (0) @@ -216,79 +192,68 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) panic("%s not compiled into M5", x); \ } while (0) -#define BUILD_COMPRESSED_CACHE(TAGS, tags, c) \ - do { \ - CompressionAlgorithm *compAlg; \ - if (compressed_bus || store_compressed) { \ - compAlg = new LZSSCompression(); \ - } else { \ - compAlg = new NullCompression(); \ - } \ - BUILD_CACHE(TAGS, tags, c); \ - } while (0) - #if defined(USE_CACHE_FALRU) -#define BUILD_FALRU_CACHE(c) do { \ +#define BUILD_FALRU_CACHE do { \ FALRU *tags = new FALRU(block_size, size, latency); \ - BUILD_COMPRESSED_CACHE(FALRU, tags, c); \ + BUILD_CACHE(FALRU, tags); \ } while (0) #else -#define BUILD_FALRU_CACHE(c) BUILD_CACHE_PANIC("falru cache") +#define BUILD_FALRU_CACHE BUILD_CACHE_PANIC("falru cache") #endif #if defined(USE_CACHE_LRU) -#define BUILD_LRU_CACHE(c) do { \ +#define BUILD_LRU_CACHE do { \ LRU *tags = new LRU(numSets, block_size, assoc, latency); \ - BUILD_COMPRESSED_CACHE(LRU, tags, c); \ + BUILD_CACHE(LRU, tags); \ } while (0) #else -#define BUILD_LRU_CACHE(c) BUILD_CACHE_PANIC("lru cache") +#define BUILD_LRU_CACHE BUILD_CACHE_PANIC("lru cache") #endif #if defined(USE_CACHE_SPLIT) -#define BUILD_SPLIT_CACHE(c) do { \ +#define BUILD_SPLIT_CACHE do { \ Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \ two_queue, latency); \ - BUILD_COMPRESSED_CACHE(Split, tags, c); \ + BUILD_CACHE(Split, tags); \ } while (0) #else -#define BUILD_SPLIT_CACHE(c) BUILD_CACHE_PANIC("split cache") +#define BUILD_SPLIT_CACHE BUILD_CACHE_PANIC("split cache") #endif #if defined(USE_CACHE_SPLIT_LIFO) -#define BUILD_SPLIT_LIFO_CACHE(c) do { \ +#define BUILD_SPLIT_LIFO_CACHE do { \ SplitLIFO *tags = new SplitLIFO(block_size, size, assoc, \ latency, two_queue, -1); \ - BUILD_COMPRESSED_CACHE(SplitLIFO, tags, c); \ + BUILD_CACHE(SplitLIFO, tags); \ } while (0) #else -#define BUILD_SPLIT_LIFO_CACHE(c) BUILD_CACHE_PANIC("lifo cache") +#define BUILD_SPLIT_LIFO_CACHE BUILD_CACHE_PANIC("lifo cache") #endif #if defined(USE_CACHE_IIC) -#define BUILD_IIC_CACHE(c) do { \ +#define BUILD_IIC_CACHE do { \ IIC *tags = new IIC(iic_params); \ - BUILD_COMPRESSED_CACHE(IIC, tags, c); \ + BUILD_CACHE(IIC, tags); \ } while (0) #else -#define BUILD_IIC_CACHE(c) BUILD_CACHE_PANIC("iic") +#define BUILD_IIC_CACHE BUILD_CACHE_PANIC("iic") #endif -#define BUILD_CACHES(c) do { \ +#define BUILD_CACHES do { \ if (repl == NULL) { \ if (numSets == 1) { \ - BUILD_FALRU_CACHE(c); \ + BUILD_FALRU_CACHE; \ } else { \ if (split == true) { \ - BUILD_SPLIT_CACHE(c); \ + BUILD_SPLIT_CACHE; \ } else if (lifo == true) { \ - BUILD_SPLIT_LIFO_CACHE(c); \ + BUILD_SPLIT_LIFO_CACHE; \ } else { \ - BUILD_LRU_CACHE(c); \ + BUILD_LRU_CACHE; \ } \ } \ } else { \ - BUILD_IIC_CACHE(c); \ + BUILD_IIC_CACHE; \ } \ } while (0) @@ -399,8 +364,7 @@ CREATE_SIM_OBJECT(BaseCache) const void *repl = NULL; #endif - SimpleCoherence *coh = new SimpleCoherence(protocol); - BUILD_CACHES(SimpleCoherence); + BUILD_CACHES; return NULL; } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3685bc8cb..b76d7e392 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -48,13 +48,13 @@ #include "sim/sim_exit.hh" // for SimExitEvent -template -Cache::Cache(const std::string &_name, - Cache::Params ¶ms) +template +Cache::Cache(const std::string &_name, + Cache::Params ¶ms) : BaseCache(_name, params.baseParams), prefetchAccess(params.prefetchAccess), tags(params.tags), - coherence(params.coherence), prefetcher(params.prefetcher), + prefetcher(params.prefetcher), doFastWrites(params.doFastWrites), prefetchMiss(params.prefetchMiss) { @@ -67,23 +67,21 @@ Cache::Cache(const std::string &_name, memSidePort->setOtherPort(cpuSidePort); tags->setCache(this); - coherence->setCache(this); prefetcher->setCache(this); } -template +template void -Cache::regStats() +Cache::regStats() { BaseCache::regStats(); tags->regStats(name()); - coherence->regStats(name()); prefetcher->regStats(name()); } -template +template Port * -Cache::getPort(const std::string &if_name, int idx) +Cache::getPort(const std::string &if_name, int idx) { if (if_name == "" || if_name == "cpu_side") { return cpuSidePort; @@ -96,9 +94,9 @@ Cache::getPort(const std::string &if_name, int idx) } } -template +template void -Cache::deletePortRefs(Port *p) +Cache::deletePortRefs(Port *p) { if (cpuSidePort == p || memSidePort == p) panic("Can only delete functional ports\n"); @@ -107,9 +105,9 @@ Cache::deletePortRefs(Port *p) } -template +template void -Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) +Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) { uint64_t overwrite_val; bool overwrite_mem; @@ -152,9 +150,9 @@ Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) ///////////////////////////////////////////////////// -template +template void -Cache::markInService(MSHR *mshr) +Cache::markInService(MSHR *mshr) { markInServiceInternal(mshr); #if 0 @@ -171,9 +169,9 @@ Cache::markInService(MSHR *mshr) } -template +template void -Cache::squash(int threadNum) +Cache::squash(int threadNum) { bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; @@ -199,9 +197,9 @@ Cache::squash(int threadNum) // ///////////////////////////////////////////////////// -template +template bool -Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) +Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) { if (pkt->req->isUncacheable()) { blk = NULL; @@ -280,9 +278,9 @@ Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) } -template +template bool -Cache::timingAccess(PacketPtr pkt) +Cache::timingAccess(PacketPtr pkt) { //@todo Add back in MemDebug Calls // MemDebug::cacheAccess(pkt); @@ -398,10 +396,10 @@ Cache::timingAccess(PacketPtr pkt) } -template +template PacketPtr -Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, - bool needsExclusive) +Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, + bool needsExclusive) { bool blkValid = blk && blk->isValid(); @@ -441,9 +439,9 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, } -template +template Tick -Cache::atomicAccess(PacketPtr pkt) +Cache::atomicAccess(PacketPtr pkt) { int lat = hitLatency; @@ -511,10 +509,10 @@ Cache::atomicAccess(PacketPtr pkt) } -template +template void -Cache::functionalAccess(PacketPtr pkt, - CachePort *otherSidePort) +Cache::functionalAccess(PacketPtr pkt, + CachePort *otherSidePort) { Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); BlkType *blk = tags->findBlock(pkt->getAddr()); @@ -561,9 +559,9 @@ Cache::functionalAccess(PacketPtr pkt, ///////////////////////////////////////////////////// -template +template void -Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) +Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) { assert(blk); assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); @@ -586,10 +584,10 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) } -template +template bool -Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, - BlkType *blk) +Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, + BlkType *blk) { // respond to MSHR targets, if any @@ -642,9 +640,9 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, } -template +template void -Cache::handleResponse(PacketPtr pkt) +Cache::handleResponse(PacketPtr pkt) { Tick time = curTick + hitLatency; MSHR *mshr = dynamic_cast(pkt->senderState); @@ -730,9 +728,9 @@ Cache::handleResponse(PacketPtr pkt) -template +template PacketPtr -Cache::writebackBlk(BlkType *blk) +Cache::writebackBlk(BlkType *blk) { assert(blk && blk->isValid() && blk->isDirty()); @@ -754,12 +752,13 @@ Cache::writebackBlk(BlkType *blk) // is called by both atomic and timing-mode accesses, and in atomic // mode we don't mess with the write buffer (we just perform the // writebacks atomically once the original request is complete). -template -typename Cache::BlkType* -Cache::handleFill(PacketPtr pkt, BlkType *blk, - PacketList &writebacks) +template +typename Cache::BlkType* +Cache::handleFill(PacketPtr pkt, BlkType *blk, + PacketList &writebacks) { Addr addr = pkt->getAddr(); + CacheBlk::State old_state = blk ? blk->status : 0; if (blk == NULL) { // better have read new data... @@ -795,21 +794,24 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, } blk->tag = tags->extractTag(addr); - blk->status = coherence->getNewState(pkt); } else { // existing block... probably an upgrade assert(blk->tag == tags->extractTag(addr)); // either we're getting new data or the block should already be valid assert(pkt->isRead() || blk->isValid()); - CacheBlk::State old_state = blk->status; - blk->status = coherence->getNewState(pkt, old_state); - if (blk->status != old_state) - DPRINTF(Cache, "Block addr %x moving from state %i to %i\n", - addr, old_state, blk->status); - else - warn("Changing state to same value\n"); } + if (pkt->needsExclusive()) { + blk->status = BlkValid | BlkWritable | BlkDirty; + } else if (!pkt->sharedAsserted()) { + blk->status = BlkValid | BlkWritable; + } else { + blk->status = BlkValid; + } + + DPRINTF(Cache, "Block addr %x moving from state %i to %i\n", + addr, old_state, blk->status); + // if we got new data, copy it in if (pkt->isRead()) { std::memcpy(blk->data, pkt->getPtr(), blkSize); @@ -827,11 +829,11 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, // ///////////////////////////////////////////////////// -template +template void -Cache::doTimingSupplyResponse(PacketPtr req_pkt, - uint8_t *blk_data, - bool already_copied) +Cache::doTimingSupplyResponse(PacketPtr req_pkt, + uint8_t *blk_data, + bool already_copied) { // timing-mode snoop responses require a new packet, unless we // already made a copy... @@ -842,10 +844,10 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, memSidePort->respond(pkt, curTick + hitLatency); } -template +template void -Cache::handleSnoop(PacketPtr pkt, BlkType *blk, - bool is_timing, bool is_deferred) +Cache::handleSnoop(PacketPtr pkt, BlkType *blk, + bool is_timing, bool is_deferred) { if (!blk || !blk->isValid()) { return; @@ -894,9 +896,9 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, } -template +template void -Cache::snoopTiming(PacketPtr pkt) +Cache::snoopTiming(PacketPtr pkt) { if (pkt->req->isUncacheable()) { //Can't get a hit on an uncacheable address @@ -959,9 +961,9 @@ Cache::snoopTiming(PacketPtr pkt) } -template +template Tick -Cache::snoopAtomic(PacketPtr pkt) +Cache::snoopAtomic(PacketPtr pkt) { if (pkt->req->isUncacheable()) { // Can't get a hit on an uncacheable address @@ -975,9 +977,9 @@ Cache::snoopAtomic(PacketPtr pkt) } -template +template MSHR * -Cache::getNextMSHR() +Cache::getNextMSHR() { // Check both MSHR queue and write buffer for potential requests MSHR *miss_mshr = mshrQueue.getNextMSHR(); @@ -1051,9 +1053,9 @@ Cache::getNextMSHR() } -template +template PacketPtr -Cache::getTimingPacket() +Cache::getTimingPacket() { MSHR *mshr = getNextMSHR(); @@ -1100,9 +1102,9 @@ Cache::getTimingPacket() // /////////////// -template +template void -Cache::CpuSidePort:: +Cache::CpuSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { // CPU side port doesn't snoop; it's a target only. @@ -1112,9 +1114,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) } -template +template bool -Cache::CpuSidePort::recvTiming(PacketPtr pkt) +Cache::CpuSidePort::recvTiming(PacketPtr pkt) { if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); @@ -1127,17 +1129,17 @@ Cache::CpuSidePort::recvTiming(PacketPtr pkt) } -template +template Tick -Cache::CpuSidePort::recvAtomic(PacketPtr pkt) +Cache::CpuSidePort::recvAtomic(PacketPtr pkt) { return myCache()->atomicAccess(pkt); } -template +template void -Cache::CpuSidePort::recvFunctional(PacketPtr pkt) +Cache::CpuSidePort::recvFunctional(PacketPtr pkt) { checkFunctional(pkt); if (pkt->result != Packet::Success) @@ -1145,10 +1147,10 @@ Cache::CpuSidePort::recvFunctional(PacketPtr pkt) } -template -Cache:: +template +Cache:: CpuSidePort::CpuSidePort(const std::string &_name, - Cache *_cache) + Cache *_cache) : BaseCache::CachePort(_name, _cache) { } @@ -1159,9 +1161,9 @@ CpuSidePort::CpuSidePort(const std::string &_name, // /////////////// -template +template void -Cache::MemSidePort:: +Cache::MemSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { otherPort->getPeerAddressRanges(resp, snoop); @@ -1171,9 +1173,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) } -template +template bool -Cache::MemSidePort::recvTiming(PacketPtr pkt) +Cache::MemSidePort::recvTiming(PacketPtr pkt) { // this needs to be fixed so that the cache updates the mshr and sends the // packet back out on the link, but it probably won't happen so until this @@ -1196,9 +1198,9 @@ Cache::MemSidePort::recvTiming(PacketPtr pkt) } -template +template Tick -Cache::MemSidePort::recvAtomic(PacketPtr pkt) +Cache::MemSidePort::recvAtomic(PacketPtr pkt) { // in atomic mode, responses go back to the sender via the // function return from sendAtomic(), not via a separate @@ -1209,9 +1211,9 @@ Cache::MemSidePort::recvAtomic(PacketPtr pkt) } -template +template void -Cache::MemSidePort::recvFunctional(PacketPtr pkt) +Cache::MemSidePort::recvFunctional(PacketPtr pkt) { checkFunctional(pkt); if (pkt->result != Packet::Success) @@ -1220,9 +1222,9 @@ Cache::MemSidePort::recvFunctional(PacketPtr pkt) -template +template void -Cache::MemSidePort::sendPacket() +Cache::MemSidePort::sendPacket() { // if we have responses that are ready, they take precedence if (deferredPacketReady()) { @@ -1278,28 +1280,27 @@ Cache::MemSidePort::sendPacket() } } -template +template void -Cache::MemSidePort::recvRetry() +Cache::MemSidePort::recvRetry() { assert(waitingOnRetry); sendPacket(); } -template +template void -Cache::MemSidePort::processSendEvent() +Cache::MemSidePort::processSendEvent() { assert(!waitingOnRetry); sendPacket(); } -template -Cache:: -MemSidePort::MemSidePort(const std::string &_name, - Cache *_cache) +template +Cache:: +MemSidePort::MemSidePort(const std::string &_name, Cache *_cache) : BaseCache::CachePort(_name, _cache) { // override default send event from SimpleTimingPort diff --git a/src/mem/cache/coherence/CoherenceProtocol.py b/src/mem/cache/coherence/CoherenceProtocol.py deleted file mode 100644 index 82adb6862..000000000 --- a/src/mem/cache/coherence/CoherenceProtocol.py +++ /dev/null @@ -1,8 +0,0 @@ -from m5.SimObject import SimObject -from m5.params import * -class Coherence(Enum): vals = ['uni', 'msi', 'mesi', 'mosi', 'moesi'] - -class CoherenceProtocol(SimObject): - type = 'CoherenceProtocol' - do_upgrades = Param.Bool(True, "use upgrade transactions?") - protocol = Param.Coherence("name of coherence protocol") diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript deleted file mode 100644 index 91720b20e..000000000 --- a/src/mem/cache/coherence/SConscript +++ /dev/null @@ -1,36 +0,0 @@ -# -*- mode:python -*- - -# Copyright (c) 2006 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Authors: Nathan Binkert - -Import('*') - -SimObject('CoherenceProtocol.py') - -Source('coherence_protocol.cc') - diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc deleted file mode 100644 index 47d2b469f..000000000 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (c) 2002-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - * Steve Reinhardt - * Ron Dreslinski - */ - -/** - * @file - * Definitions of CoherenceProtocol. - */ - -#include - -#include "base/misc.hh" -#include "mem/cache/miss/mshr.hh" -#include "mem/cache/cache.hh" -#include "mem/cache/coherence/coherence_protocol.hh" -#include "sim/builder.hh" - -using namespace std; - - -CoherenceProtocol::StateTransition::StateTransition() - : busCmd(MemCmd::InvalidCmd), newState(-1), snoopFunc(invalidTransition) -{ -} - - -void -CoherenceProtocol::regStats() -{ - // Even though we count all the possible transitions in the - // requestCount and snoopCount arrays, most of these are invalid, - // so we just select the interesting ones to print here. - - requestCount[Invalid][MemCmd::ReadReq] - .name(name() + ".read_invalid") - .desc("read misses to invalid blocks") - ; - - requestCount[Invalid][MemCmd::WriteReq] - .name(name() +".write_invalid") - .desc("write misses to invalid blocks") - ; - - requestCount[Invalid][MemCmd::SoftPFReq] - .name(name() +".swpf_invalid") - .desc("soft prefetch misses to invalid blocks") - ; - - requestCount[Invalid][MemCmd::HardPFReq] - .name(name() +".hwpf_invalid") - .desc("hard prefetch misses to invalid blocks") - ; - - requestCount[Shared][MemCmd::WriteReq] - .name(name() + ".write_shared") - .desc("write misses to shared blocks") - ; - - requestCount[Owned][MemCmd::WriteReq] - .name(name() + ".write_owned") - .desc("write misses to owned blocks") - ; - - snoopCount[Shared][MemCmd::ReadReq] - .name(name() + ".snoop_read_shared") - .desc("read snoops on shared blocks") - ; - - snoopCount[Shared][MemCmd::ReadExReq] - .name(name() + ".snoop_readex_shared") - .desc("readEx snoops on shared blocks") - ; - - snoopCount[Shared][MemCmd::UpgradeReq] - .name(name() + ".snoop_upgrade_shared") - .desc("upgradee snoops on shared blocks") - ; - - snoopCount[Modified][MemCmd::ReadReq] - .name(name() + ".snoop_read_modified") - .desc("read snoops on modified blocks") - ; - - snoopCount[Modified][MemCmd::ReadExReq] - .name(name() + ".snoop_readex_modified") - .desc("readEx snoops on modified blocks") - ; - - snoopCount[Owned][MemCmd::ReadReq] - .name(name() + ".snoop_read_owned") - .desc("read snoops on owned blocks") - ; - - snoopCount[Owned][MemCmd::ReadExReq] - .name(name() + ".snoop_readex_owned") - .desc("readEx snoops on owned blocks") - ; - - snoopCount[Owned][MemCmd::UpgradeReq] - .name(name() + ".snoop_upgrade_owned") - .desc("upgrade snoops on owned blocks") - ; - - snoopCount[Exclusive][MemCmd::ReadReq] - .name(name() + ".snoop_read_exclusive") - .desc("read snoops on exclusive blocks") - ; - - snoopCount[Exclusive][MemCmd::ReadExReq] - .name(name() + ".snoop_readex_exclusive") - .desc("readEx snoops on exclusive blocks") - ; - - snoopCount[Shared][MemCmd::WriteInvalidateReq] - .name(name() + ".snoop_writeinv_shared") - .desc("WriteInvalidate snoops on shared blocks") - ; - - snoopCount[Owned][MemCmd::WriteInvalidateReq] - .name(name() + ".snoop_writeinv_owned") - .desc("WriteInvalidate snoops on owned blocks") - ; - - snoopCount[Exclusive][MemCmd::WriteInvalidateReq] - .name(name() + ".snoop_writeinv_exclusive") - .desc("WriteInvalidate snoops on exclusive blocks") - ; - - snoopCount[Modified][MemCmd::WriteInvalidateReq] - .name(name() + ".snoop_writeinv_modified") - .desc("WriteInvalidate snoops on modified blocks") - ; - - snoopCount[Invalid][MemCmd::WriteInvalidateReq] - .name(name() + ".snoop_writeinv_invalid") - .desc("WriteInvalidate snoops on invalid blocks") - ; -} - - -bool -CoherenceProtocol::invalidateTrans(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, MSHR *mshr, - CacheBlk::State & new_state) -{ - // invalidate the block - new_state = (blk->status & ~stateMask) | Invalid; - return false; -} - - -bool -CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - return true; -} - - -bool -CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - new_state = (blk->status & ~stateMask) | Shared; - pkt->assertShared(); - return supplyTrans(cache, pkt, blk, mshr, new_state); -} - - -bool -CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - new_state = (blk->status & ~stateMask) | Owned; - pkt->assertShared(); - return supplyTrans(cache, pkt, blk, mshr, new_state); -} - - -bool -CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - new_state = (blk->status & ~stateMask) | Invalid; - return supplyTrans(cache, pkt, blk, mshr, new_state); -} - -bool -CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - new_state = (blk->status & ~stateMask) | Shared; - pkt->assertShared(); - return false; -} - -CoherenceProtocol::CoherenceProtocol(const string &name, - const string &protocol, - const bool doUpgrades) - : SimObject(name) -{ - // Python should catch this, but in case it doesn't... - if (!(protocol == "msi" || protocol == "mesi" || - protocol == "mosi" || protocol == "moesi")) { - fatal("CoherenceProtocol: unrecognized protocol %s\n", protocol); - } - - bool hasOwned = (protocol == "mosi" || protocol == "moesi"); - bool hasExclusive = (protocol == "mesi" || protocol == "moesi"); - - if (hasOwned && !doUpgrades) { - fatal("CoherenceProtocol: ownership protocols require upgrade " - "transactions\n(write miss on owned block generates ReadExcl, " - "which will clobber dirty block)\n"); - } - - // set up a few shortcuts to save typing & visual clutter - typedef MemCmd MC; - StateTransition (&tt)[stateMax+1][MC::NUM_MEM_CMDS] = transitionTable; - - MC::Command writeToSharedCmd = - doUpgrades ? MC::UpgradeReq : MC::ReadExReq; - MC::Command writeToSharedResp = - doUpgrades ? MC::UpgradeResp : MC::ReadExResp; - - // Note that all transitions by default cause a panic. - // Override the valid transitions with the appropriate actions here. - - // - // ----- incoming requests: specify outgoing bus request ----- - // - tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq); - // we only support write allocate right now - tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq); - tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq); - tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq); - tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq); - tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd); - tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq); - tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd); - if (hasOwned) { - tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd); - tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq); - tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd); - } - - // Prefetching causes a read - tt[Invalid][MC::SoftPFReq].onRequest(MC::ReadReq); - tt[Invalid][MC::HardPFReq].onRequest(MC::ReadReq); - - // - // ----- on response to given request: specify new state ----- - // - tt[Invalid][MC::ReadExResp].onResponse(Modified); - tt[Shared][writeToSharedResp].onResponse(Modified); - // Go to Exclusive state on read response if we have one (will - // move into shared if the shared line is asserted in the - // getNewState function) - // - // originally had this as: - // tt[Invalid][MC::ReadResp].onResponse(hasExclusive ? Exclusive: Shared); - // ...but for some reason that caused a link error... - if (hasExclusive) { - tt[Invalid][MC::ReadResp].onResponse(Exclusive); - } else { - tt[Invalid][MC::ReadResp].onResponse(Shared); - } - if (hasOwned) { - tt[Owned][writeToSharedResp].onResponse(Modified); - } - - // - // ----- bus snoop transition functions ----- - // - tt[Invalid][MC::ReadReq].onSnoop(nullTransition); - tt[Invalid][MC::ReadExReq].onSnoop(nullTransition); - tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans); - tt[Shared][MC::ReadReq].onSnoop(hasExclusive - ? assertShared : nullTransition); - tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans); - tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans); - if (doUpgrades) { - tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition); - tt[Shared][MC::UpgradeReq].onSnoop(invalidateTrans); - } - tt[Modified][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans); - tt[Modified][MC::ReadReq].onSnoop(hasOwned - ? supplyAndGotoOwnedTrans - : supplyAndGotoSharedTrans); - tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans); - - if (hasExclusive) { - tt[Exclusive][MC::ReadReq].onSnoop(assertShared); - tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans); - tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans); - } - - if (hasOwned) { - tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans); - tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans); - tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans); - tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans); - } - - // @todo add in hardware prefetch to this list -} - - -MemCmd -CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state, - MSHR *mshr) -{ - state &= stateMask; - int cmd_idx = cmdIn.toInt(); - - assert(0 <= state && state <= stateMax); - assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS); - - MemCmd::Command cmdOut = transitionTable[state][cmd_idx].busCmd; - - assert(cmdOut != MemCmd::InvalidCmd); - - ++requestCount[state][cmd_idx]; - - return cmdOut; -} - - -CacheBlk::State -CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState) -{ - CacheBlk::State state = oldState & stateMask; - int cmd_idx = pkt->cmdToIndex(); - - assert(0 <= state && state <= stateMax); - assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS); - - CacheBlk::State newState = transitionTable[state][cmd_idx].newState; - - //Check if it's exclusive and the shared line was asserted, - //then goto shared instead - if (newState == Exclusive && pkt->sharedAsserted()) { - newState = Shared; - } - - assert(newState != -1); - - //Make sure not to loose any other state information - newState = (oldState & ~stateMask) | newState; - return newState; -} - - -bool -CoherenceProtocol::handleBusRequest(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, - MSHR *mshr, - CacheBlk::State & new_state) -{ - if (blk == NULL) { - // nothing to do if we don't have a block - return false; - } - - CacheBlk::State state = blk->status & stateMask; - int cmd_idx = pkt->cmdToIndex(); - - assert(0 <= state && state <= stateMax); - assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS); - -// assert(mshr == NULL); // can't currently handle outstanding requests - //Check first if MSHR, and also insure, if there is one, that it is not in service - assert(!mshr || mshr->inService == 0); - ++snoopCount[state][cmd_idx]; - - bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr, - new_state); - - - - return ret; -} - -bool -CoherenceProtocol::nullTransition(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, MSHR *mshr, - CacheBlk::State & new_state) -{ - // do nothing - if (blk) - new_state = blk->status; - return false; -} - - -bool -CoherenceProtocol::invalidTransition(BaseCache *cache, PacketPtr &pkt, - CacheBlk *blk, MSHR *mshr, - CacheBlk::State & new_state) -{ - panic("Invalid transition"); - return false; -} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) - - Param protocol; - Param do_upgrades; - -END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) - - -BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) - - INIT_PARAM(protocol, "name of coherence protocol"), - INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true) - -END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) - - -CREATE_SIM_OBJECT(CoherenceProtocol) -{ - return new CoherenceProtocol(getInstanceName(), protocol, - do_upgrades); -} - -REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol) - -#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh deleted file mode 100644 index 4b8024582..000000000 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (c) 2002-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - * Ron Dreslinski - * Steve Reinhardt - */ - -/** - * @file - * Declaration of CoherenceProcotol a basic coherence policy. - */ -#ifndef __COHERENCE_PROTOCOL_HH__ -#define __COHERENCE_PROTOCOL_HH__ - -#include - -#include "sim/sim_object.hh" -#include "mem/packet.hh" -#include "mem/cache/cache_blk.hh" -#include "base/statistics.hh" - -class BaseCache; -class MSHR; - -/** - * A simple coherence policy for the memory hierarchy. Currently implements - * MSI, MESI, and MOESI protocols. - */ -class CoherenceProtocol : public SimObject -{ - public: - /** - * Contruct and initialize this policy. - * @param name The name of this policy. - * @param protocol The string representation of the protocol to use. - * @param doUpgrades True if bus upgrades should be used. - */ - CoherenceProtocol(const std::string &name, const std::string &protocol, - const bool doUpgrades); - - /** - * Destructor. - */ - virtual ~CoherenceProtocol() {}; - - /** - * Register statistics - */ - virtual void regStats(); - - /** - * Get the proper bus command for the given command and status. - * @param cmd The request's command. - * @param status The current state of the cache block. - * @param mshr The MSHR matching the request. - * @return The proper bus command, as determined by the protocol. - */ - MemCmd getBusCmd(MemCmd cmd, CacheBlk::State status, - MSHR *mshr = NULL); - - /** - * Return the proper state given the current state and the bus response. - * @param pkt The bus response. - * @param oldState The current block state. - * @return The new state. - */ - CacheBlk::State getNewState(PacketPtr pkt, - CacheBlk::State oldState = 0); - - /** - * Handle snooped bus requests. - * @param cache The cache that snooped the request. - * @param pkt The snooped bus request. - * @param blk The cache block corresponding to the request, if any. - * @param mshr The MSHR corresponding to the request, if any. - * @param new_state The new coherence state of the block. - * @return True if the request should be satisfied locally. - */ - bool handleBusRequest(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk, - MSHR *mshr, CacheBlk::State &new_state); - - protected: - /** Snoop function type. */ - typedef bool (*SnoopFuncType)(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - // - // Standard snoop transition functions - // - - /** - * Do nothing transition. - */ - static bool nullTransition(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Invalid transition, basically panic. - */ - static bool invalidTransition(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Invalidate block, move to Invalid state. - */ - static bool invalidateTrans(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Supply data, no state transition. - */ - static bool supplyTrans(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Supply data and go to Shared state. - */ - static bool supplyAndGotoSharedTrans(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Supply data and go to Owned state. - */ - static bool supplyAndGotoOwnedTrans(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Invalidate block, supply data, and go to Invalid state. - */ - static bool supplyAndInvalidateTrans(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Assert the shared line for a block that is shared/exclusive. - */ - static bool assertShared(BaseCache *, PacketPtr &, CacheBlk *, - MSHR *, CacheBlk::State&); - - /** - * Definition of protocol state transitions. - */ - class StateTransition - { - friend class CoherenceProtocol; - - /** The bus command of this transition. */ - Packet::Command busCmd; - /** The state to transition to. */ - int newState; - /** The snoop function for this transition. */ - SnoopFuncType snoopFunc; - - /** - * Constructor, defaults to invalid transition. - */ - StateTransition(); - - /** - * Initialize bus command. - * @param cmd The bus command to use. - */ - void onRequest(Packet::Command cmd) - { - busCmd = cmd; - } - - /** - * Set the transition state. - * @param s The new state. - */ - void onResponse(CacheBlk::State s) - { - newState = s; - } - - /** - * Initialize the snoop function. - * @param f The new snoop function. - */ - void onSnoop(SnoopFuncType f) - { - snoopFunc = f; - } - }; - - friend class CoherenceProtocol::StateTransition; - - /** Mask to select status bits relevant to coherence protocol. */ - static const int stateMask = BlkValid | BlkWritable | BlkDirty; - - /** The Modified (M) state. */ - static const int Modified = BlkValid | BlkWritable | BlkDirty; - /** The Owned (O) state. */ - static const int Owned = BlkValid | BlkDirty; - /** The Exclusive (E) state. */ - static const int Exclusive = BlkValid | BlkWritable; - /** The Shared (S) state. */ - static const int Shared = BlkValid; - /** The Invalid (I) state. */ - static const int Invalid = 0; - - /** - * Maximum state encoding value (used to size transition lookup - * table). Could be more than number of states, depends on - * encoding of status bits. - */ - static const int stateMax = stateMask; - - /** - * The table of all possible transitions, organized by starting state and - * request command. - */ - StateTransition transitionTable[stateMax+1][MemCmd::NUM_MEM_CMDS]; - - /** - * @addtogroup CoherenceStatistics - * @{ - */ - /** - * State accesses from parent cache. - */ - Stats::Scalar<> requestCount[stateMax+1][MemCmd::NUM_MEM_CMDS]; - /** - * State accesses from snooped requests. - */ - Stats::Scalar<> snoopCount[stateMax+1][MemCmd::NUM_MEM_CMDS]; - /** - * @} - */ -}; - -#endif // __COHERENCE_PROTOCOL_HH__ diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh deleted file mode 100644 index 214828ca7..000000000 --- a/src/mem/cache/coherence/simple_coherence.hh +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Erik Hallnor - * Ron Dreslinski - */ - -/** - * @file - * Declaration of a simple coherence policy. - */ - -#ifndef __SIMPLE_COHERENCE_HH__ -#define __SIMPLE_COHERENCE_HH__ - -#include - -#include "mem/packet.hh" -#include "mem/cache/cache_blk.hh" -#include "mem/cache/miss/mshr_queue.hh" -#include "mem/cache/coherence/coherence_protocol.hh" - -class BaseCache; - -/** - * A simple MP coherence policy. This policy assumes an atomic bus and only one - * level of cache. - */ -class SimpleCoherence -{ - protected: - /** Pointer to the parent cache. */ - BaseCache *cache; - /** Pointer to the coherence protocol. */ - CoherenceProtocol *protocol; - - public: - /** - * Construct and initialize this coherence policy. - * @param _protocol The coherence protocol to use. - */ - SimpleCoherence(CoherenceProtocol *_protocol) - : protocol(_protocol) - { - } - - /** - * Set the pointer to the parent cache. - * @param _cache The parent cache. - */ - void setCache(BaseCache *_cache) - { - cache = _cache; - } - - /** - * Register statistics. - * @param name The name to prepend to stat descriptions. - */ - void regStats(const std::string &name) - { - } - - /** - * This policy does not forward invalidates, return NULL. - * @return NULL. - */ - PacketPtr getPacket() - { - return NULL; - } - - /** - * Return the proper state given the current state and the bus response. - * @param pkt The bus response. - * @param current The current block state. - * @return The new state. - */ - CacheBlk::State getNewState(PacketPtr pkt, - CacheBlk::State current = 0) - { - return protocol->getNewState(pkt, current); - } - - /** - * Handle snooped bus requests. - * @param pkt The snooped bus request. - * @param blk The cache block corresponding to the request, if any. - * @param mshr The MSHR corresponding to the request, if any. - * @param new_state Return the new state for the block. - */ - bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr, - CacheBlk::State &new_state) - { -// assert(mshr == NULL); -//Got rid of, there could be an MSHR, but it can't be in service - if (blk != NULL) - { - if (pkt->cmd != MemCmd::Writeback) { - return protocol->handleBusRequest(cache, pkt, blk, mshr, - new_state); - } - else { //It is a writeback, must be ownership protocol, just keep state - new_state = blk->status; - } - } - return false; - } - - /** - * Get the proper bus command for the given command and status. - * @param cmd The request's command. - * @param state The current state of the cache block. - * @return The proper bus command, as determined by the protocol. - */ - MemCmd getBusCmd(MemCmd cmd, - CacheBlk::State state) - { - if (cmd == MemCmd::Writeback) return MemCmd::Writeback; - return protocol->getBusCmd(cmd, state); - } - - /** - * Return true if this coherence policy can handle fast cache writes. - */ - bool allowFastWrites() { return false; } - - bool hasProtocol() { return true; } -}; - -#endif //__SIMPLE_COHERENCE_HH__ - - - - - - - - -- cgit v1.2.3 From 6ab53415efe3e06c06589a8a6ef38185ff6f94b7 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 10:16:18 -0700 Subject: Get rid of Packet result field. Error responses are now encoded in cmd field. --HG-- extra : convert_revision : d67819b7e3ee4b9a5bf08541104de0a89485e90b --- src/arch/sparc/tlb.cc | 4 +- src/cpu/o3/fetch_impl.hh | 6 -- src/cpu/o3/lsq_unit.hh | 6 -- src/cpu/o3/lsq_unit_impl.hh | 26 ------- src/cpu/ozone/lw_lsq.hh | 10 --- src/cpu/ozone/lw_lsq_impl.hh | 18 ----- src/cpu/simple/atomic.cc | 76 ++++++------------ src/cpu/simple/atomic.hh | 10 +-- src/cpu/simple/base.hh | 3 - src/cpu/simple/timing.cc | 8 +- src/dev/alpha/console.cc | 11 +-- src/dev/alpha/tsunami_cchip.cc | 5 +- src/dev/alpha/tsunami_io.cc | 6 +- src/dev/alpha/tsunami_pchip.cc | 6 +- src/dev/i8254xGBe.cc | 4 +- src/dev/ide_ctrl.cc | 14 ++-- src/dev/io_device.cc | 4 +- src/dev/isa_fake.cc | 9 +-- src/dev/ns_gige.cc | 8 +- src/dev/pciconfigall.cc | 4 +- src/dev/pcidev.cc | 5 +- src/dev/sparc/dtod.cc | 3 +- src/dev/sparc/iob.cc | 5 +- src/dev/sparc/mm_disk.cc | 6 +- src/dev/uart8250.cc | 6 +- src/mem/bridge.cc | 17 ++--- src/mem/bridge.hh | 2 +- src/mem/bus.cc | 30 ++++---- src/mem/cache/base_cache.cc | 2 +- src/mem/cache/cache_impl.hh | 12 ++- src/mem/packet.cc | 19 ++--- src/mem/packet.hh | 169 ++++++++++++++++++----------------------- src/mem/physical.cc | 2 +- src/mem/port.cc | 3 +- src/mem/tport.cc | 21 +++-- 35 files changed, 199 insertions(+), 341 deletions(-) diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc index 09266fd6e..68df19618 100644 --- a/src/arch/sparc/tlb.cc +++ b/src/arch/sparc/tlb.cc @@ -1023,7 +1023,7 @@ doMmuReadError: panic("need to impl DTB::doMmuRegRead() got asi=%#x, va=%#x\n", (uint32_t)asi, va); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return tc->getCpuPtr()->cycles(1); } @@ -1268,7 +1268,7 @@ doMmuWriteError: panic("need to impl DTB::doMmuRegWrite() got asi=%#x, va=%#x d=%#x\n", (uint32_t)pkt->req->getAsi(), pkt->getAddr(), data); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return tc->getCpuPtr()->cycles(1); } diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 01e9b5b31..aa0c69ac4 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -628,12 +628,6 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Now do the timing access to see whether or not the instruction // exists within the cache. if (!icachePort->sendTiming(data_pkt)) { - if (data_pkt->result == Packet::BadAddress) { - fault = TheISA::genMachineCheckFault(); - delete mem_req; - memReq[tid] = NULL; - warn("Bad address!\n"); - } assert(retryPkt == NULL); assert(retryTid == -1); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index cc33e025d..d964b9f9f 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -653,8 +653,6 @@ LSQUnit::read(Request *req, T &data, int load_idx) data_pkt->senderState = state; if (!dcachePort->sendTiming(data_pkt)) { - Packet::Result result = data_pkt->result; - // Delete state and data packet because a load retry // initiates a pipeline restart; it does not retry. delete state; @@ -663,10 +661,6 @@ LSQUnit::read(Request *req, T &data, int load_idx) req = NULL; - if (result == Packet::BadAddress) { - return TheISA::genMachineCheckFault(); - } - // If the access didn't succeed, tell the LSQ by setting // the retry thread id. lsq->setRetryTid(lsqID); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index bde4f8079..91e616589 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -690,9 +690,6 @@ LSQUnit::writebackStores() } if (!dcachePort->sendTiming(data_pkt)) { - if (data_pkt->result == Packet::BadAddress) { - panic("LSQ sent out a bad address for a completed store!"); - } // Need to handle becoming blocked on a store. DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will" "retry later\n", @@ -844,26 +841,6 @@ LSQUnit::storePostSend(PacketPtr pkt) #endif } - if (pkt->result != Packet::Success) { - DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", - storeWBIdx); - - DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", - storeQueue[storeWBIdx].inst->seqNum); - - //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); - - //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); - - // @todo: Increment stat here. - } else { - DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", - storeWBIdx); - - DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", - storeQueue[storeWBIdx].inst->seqNum); - } - incrStIdx(storeWBIdx); } @@ -952,9 +929,6 @@ LSQUnit::recvRetry() assert(retryPkt != NULL); if (dcachePort->sendTiming(retryPkt)) { - if (retryPkt->result == Packet::BadAddress) { - panic("LSQ sent out a bad address for a completed store!"); - } storePostSend(retryPkt); retryPkt = NULL; isStoreBlocked = false; diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index 2048ad6bb..d9e0d04ac 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -661,16 +661,6 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) cpu->lockFlag = true; } - if (data_pkt->result != Packet::Success) { - DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - inst->seqNum); - } else { - DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n"); - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - inst->seqNum); - } - return NoFault; } diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index f26b06453..eefc0df83 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -853,24 +853,6 @@ OzoneLWLSQ::storePostSend(PacketPtr pkt, DynInstPtr &inst) } #endif } - - if (pkt->result != Packet::Success) { - DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); - - DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", - inst->seqNum); - - //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); - - //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); - - // @todo: Increment stat here. - } else { - DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n"); - - DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", - inst->seqNum); - } } template diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 03ff1282b..bcd6662c8 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -148,23 +148,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) icachePort.snoopRangeSent = false; dcachePort.snoopRangeSent = false; - ifetch_req = new Request(); - ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT - ifetch_pkt = new Packet(ifetch_req, MemCmd::ReadReq, Packet::Broadcast); - ifetch_pkt->dataStatic(&inst); - - data_read_req = new Request(); - data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too - data_read_pkt = new Packet(data_read_req, MemCmd::ReadReq, - Packet::Broadcast); - data_read_pkt->dataStatic(&dataReg); - - data_write_req = new Request(); - data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too - data_write_pkt = new Packet(data_write_req, MemCmd::WriteReq, - Packet::Broadcast); - data_swap_pkt = new Packet(data_write_req, MemCmd::SwapReq, - Packet::Broadcast); + ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT + data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too + data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too } @@ -282,9 +268,7 @@ Fault AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) { // use the CPU's statically allocated read request and packet objects - Request *req = data_read_req; - PacketPtr pkt = data_read_pkt; - + Request *req = &data_read_req; req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { @@ -296,19 +280,15 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) // Now do the access. if (fault == NoFault) { - pkt->reinitFromRequest(); + Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast); + pkt.dataStatic(&data); if (req->isMmapedIpr()) - dcache_latency = TheISA::handleIprRead(thread->getTC(),pkt); + dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt); else - dcache_latency = dcachePort.sendAtomic(pkt); + dcache_latency = dcachePort.sendAtomic(&pkt); dcache_access = true; -#if !defined(NDEBUG) - if (pkt->result != Packet::Success) - panic("Unable to find responder for address pa = %#X va = %#X\n", - pkt->req->getPaddr(), pkt->req->getVaddr()); -#endif - data = pkt->get(); + assert(!pkt.isError()); if (req->isLocked()) { TheISA::handleLockedRead(thread, req); @@ -378,16 +358,9 @@ Fault AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { // use the CPU's statically allocated write request and packet objects - Request *req = data_write_req; - PacketPtr pkt; - + Request *req = &data_write_req; req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); - if (req->isSwap()) - pkt = data_swap_pkt; - else - pkt = data_write_pkt; - if (traceData) { traceData->setAddr(addr); } @@ -397,6 +370,11 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // Now do the access. if (fault == NoFault) { + Packet pkt = + Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq, + Packet::Broadcast); + pkt.dataStatic(&data); + bool do_access = true; // flag to suppress cache access if (req->isLocked()) { @@ -409,27 +387,19 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) if (do_access) { - pkt->reinitFromRequest(); - pkt->dataStatic(&data); - if (req->isMmapedIpr()) { - dcache_latency = TheISA::handleIprWrite(thread->getTC(), pkt); + dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt); } else { data = htog(data); - dcache_latency = dcachePort.sendAtomic(pkt); + dcache_latency = dcachePort.sendAtomic(&pkt); } dcache_access = true; - -#if !defined(NDEBUG) - if (pkt->result != Packet::Success) - panic("Unable to find responder for address pa = %#X va = %#X\n", - pkt->req->getPaddr(), pkt->req->getVaddr()); -#endif + assert(!pkt.isError()); } if (req->isSwap()) { assert(res); - *res = pkt->get(); + *res = pkt.get(); } else if (res) { *res = req->getExtraData(); } @@ -513,7 +483,7 @@ AtomicSimpleCPU::tick() if (!curStaticInst || !curStaticInst->isDelayedCommit()) checkForInterrupts(); - Fault fault = setupFetchRequest(ifetch_req); + Fault fault = setupFetchRequest(&ifetch_req); if (fault == NoFault) { Tick icache_latency = 0; @@ -524,9 +494,11 @@ AtomicSimpleCPU::tick() //if(predecoder.needMoreBytes()) //{ icache_access = true; - ifetch_pkt->reinitFromRequest(); + Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, + Packet::Broadcast); + ifetch_pkt.dataStatic(&inst); - icache_latency = icachePort.sendAtomic(ifetch_pkt); + icache_latency = icachePort.sendAtomic(&ifetch_pkt); // ifetch_req is initialized to read the instruction directly // into the CPU object's inst field. //} diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index b127e3791..28e883b24 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -121,13 +121,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU }; DcachePort dcachePort; - Request *ifetch_req; - PacketPtr ifetch_pkt; - Request *data_read_req; - PacketPtr data_read_pkt; - Request *data_write_req; - PacketPtr data_write_pkt; - PacketPtr data_swap_pkt; + Request ifetch_req; + Request data_read_req; + Request data_write_req; bool dcache_access; Tick dcache_latency; diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 243167db0..0550aa036 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -131,9 +131,6 @@ class BaseSimpleCPU : public BaseCPU // The predecoder TheISA::Predecoder predecoder; - // Static data storage - TheISA::LargestRead dataReg; - StaticInstPtr curStaticInst; StaticInstPtr curMacroStaticInst; diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 7698a588d..b4e4a4433 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -501,7 +501,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt) { // received a response from the icache: execute the received // instruction - assert(pkt->result == Packet::Success); + assert(!pkt->isError()); assert(_status == IcacheWaitResponse); _status = Running; @@ -569,7 +569,7 @@ TimingSimpleCPU::IcachePort::recvTiming(PacketPtr pkt) return true; } - else if (pkt->result == Packet::Nacked) { + else if (pkt->wasNacked()) { assert(cpu->_status == IcacheWaitResponse); pkt->reinitNacked(); if (!sendTiming(pkt)) { @@ -600,7 +600,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) { // received a response from the dcache: complete the load or store // instruction - assert(pkt->result == Packet::Success); + assert(!pkt->isError()); assert(_status == DcacheWaitResponse); _status = Running; @@ -663,7 +663,7 @@ TimingSimpleCPU::DcachePort::recvTiming(PacketPtr pkt) return true; } - else if (pkt->result == Packet::Nacked) { + else if (pkt->wasNacked()) { assert(cpu->_status == DcacheWaitResponse); pkt->reinitNacked(); if (!sendTiming(pkt)) { diff --git a/src/dev/alpha/console.cc b/src/dev/alpha/console.cc index 443f376a5..55549a154 100644 --- a/src/dev/alpha/console.cc +++ b/src/dev/alpha/console.cc @@ -102,7 +102,6 @@ AlphaConsole::read(PacketPtr pkt) * machine dependent address swizzle is required? */ - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr daddr = pkt->getAddr() - pioAddr; @@ -130,7 +129,7 @@ AlphaConsole::read(PacketPtr pkt) /* Old console code read in everyting as a 32bit int * we now break that for better error checking. */ - pkt->result = Packet::BadAddress; + pkt->setBadAddress(); } DPRINTF(AlphaConsole, "read: offset=%#x val=%#x\n", daddr, pkt->get()); @@ -187,17 +186,15 @@ AlphaConsole::read(PacketPtr pkt) pkt->get()); break; default: - pkt->result = Packet::BadAddress; + pkt->setBadAddress(); } - if (pkt->result == Packet::Unknown) - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } Tick AlphaConsole::write(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr daddr = pkt->getAddr() - pioAddr; @@ -245,7 +242,7 @@ AlphaConsole::write(PacketPtr pkt) panic("Unknown 64bit access, %#x\n", daddr); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/alpha/tsunami_cchip.cc b/src/dev/alpha/tsunami_cchip.cc index 118160adf..a7175d90c 100644 --- a/src/dev/alpha/tsunami_cchip.cc +++ b/src/dev/alpha/tsunami_cchip.cc @@ -78,7 +78,6 @@ TsunamiCChip::read(PacketPtr pkt) { DPRINTF(Tsunami, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr regnum = (pkt->getAddr() - pioAddr) >> 6; @@ -181,7 +180,7 @@ TsunamiCChip::read(PacketPtr pkt) DPRINTF(Tsunami, "Tsunami CChip: read regnum=%#x size=%d data=%lld\n", regnum, pkt->getSize(), pkt->get()); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -365,7 +364,7 @@ TsunamiCChip::write(PacketPtr pkt) panic("default in cchip read reached, accessing 0x%x\n"); } // swtich(regnum) } // not BIG_TSUNAMI write - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/alpha/tsunami_io.cc b/src/dev/alpha/tsunami_io.cc index 58933428c..f59a06fba 100644 --- a/src/dev/alpha/tsunami_io.cc +++ b/src/dev/alpha/tsunami_io.cc @@ -461,7 +461,6 @@ TsunamiIO::frequency() const Tick TsunamiIO::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr daddr = pkt->getAddr() - pioAddr; @@ -520,14 +519,13 @@ TsunamiIO::read(PacketPtr pkt) } else { panic("I/O Read - invalid size - va %#x size %d\n", pkt->getAddr(), pkt->getSize()); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } Tick TsunamiIO::write(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr daddr = pkt->getAddr() - pioAddr; @@ -600,7 +598,7 @@ TsunamiIO::write(PacketPtr pkt) panic("I/O Write - va%#x size %d data %#x\n", pkt->getAddr(), pkt->getSize(), pkt->get()); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/alpha/tsunami_pchip.cc b/src/dev/alpha/tsunami_pchip.cc index f30199337..be164e5b9 100644 --- a/src/dev/alpha/tsunami_pchip.cc +++ b/src/dev/alpha/tsunami_pchip.cc @@ -71,7 +71,6 @@ TsunamiPChip::TsunamiPChip(Params *p) Tick TsunamiPChip::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); pkt->allocate(); @@ -145,7 +144,7 @@ TsunamiPChip::read(PacketPtr pkt) default: panic("Default in PChip Read reached reading 0x%x\n", daddr); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -153,7 +152,6 @@ TsunamiPChip::read(PacketPtr pkt) Tick TsunamiPChip::write(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); Addr daddr = (pkt->getAddr() - pioAddr) >> 6; @@ -224,7 +222,7 @@ TsunamiPChip::write(PacketPtr pkt) } // uint64_t - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc index baf13c49a..7ea4c704b 100644 --- a/src/dev/i8254xGBe.cc +++ b/src/dev/i8254xGBe.cc @@ -271,7 +271,7 @@ IGbE::read(PacketPtr pkt) pkt->set(0); }; - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -543,7 +543,7 @@ IGbE::write(PacketPtr pkt) panic("Write request to unknown register number: %#x\n", daddr); }; - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index 921ba1cd0..01243ae73 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -295,7 +295,7 @@ IdeController::readConfig(PacketPtr pkt) default: panic("invalid access size(?) for PCI configspace!\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return configDelay; } @@ -403,7 +403,7 @@ IdeController::writeConfig(PacketPtr pkt) bm_enabled = false; break; } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return configDelay; } @@ -423,7 +423,7 @@ IdeController::read(PacketPtr pkt) parseAddr(pkt->getAddr(), offset, channel, reg_type); if (!io_enabled) { - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -490,7 +490,7 @@ IdeController::read(PacketPtr pkt) DPRINTF(IdeCtrl, "read from offset: %#x size: %#x data: %#x\n", offset, pkt->getSize(), pkt->get()); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -506,7 +506,7 @@ IdeController::write(PacketPtr pkt) parseAddr(pkt->getAddr(), offset, channel, reg_type); if (!io_enabled) { - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); DPRINTF(IdeCtrl, "io not enabled\n"); return pioDelay; } @@ -514,7 +514,7 @@ IdeController::write(PacketPtr pkt) switch (reg_type) { case BMI_BLOCK: if (!bm_enabled) { - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -673,7 +673,7 @@ IdeController::write(PacketPtr pkt) offset, pkt->getSize(), pkt->get()); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index ecbb391ef..806d13d07 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -100,9 +100,7 @@ DmaPort::DmaPort(DmaDevice *dev, System *s) bool DmaPort::recvTiming(PacketPtr pkt) { - - - if (pkt->result == Packet::Nacked) { + if (pkt->wasNacked()) { DPRINTF(DMA, "Received nacked Pkt %#x with State: %#x Addr: %#x\n", pkt, pkt->senderState, pkt->getAddr()); diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc index c36ddeb83..5cd0afb36 100644 --- a/src/dev/isa_fake.cc +++ b/src/dev/isa_fake.cc @@ -56,7 +56,6 @@ IsaFake::IsaFake(Params *p) Tick IsaFake::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); if (params()->warnAccess != "") warn("Device %s accessed by read to address %#x size=%d\n", @@ -64,7 +63,7 @@ IsaFake::read(PacketPtr pkt) if (params()->retBadAddr) { DPRINTF(Tsunami, "read to bad address va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); - pkt->result = Packet::BadAddress; + pkt->setBadAddress(); } else { assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); DPRINTF(Tsunami, "read va=%#x size=%d\n", @@ -85,7 +84,7 @@ IsaFake::read(PacketPtr pkt) default: panic("invalid access size!\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); } return pioDelay; } @@ -117,7 +116,7 @@ IsaFake::write(PacketPtr pkt) if (params()->retBadAddr) { DPRINTF(Tsunami, "write to bad address va=%#x size=%d \n", pkt->getAddr(), pkt->getSize()); - pkt->result = Packet::BadAddress; + pkt->setBadAddress(); } else { DPRINTF(Tsunami, "write - va=%#x size=%d \n", pkt->getAddr(), pkt->getSize()); @@ -140,7 +139,7 @@ IsaFake::write(PacketPtr pkt) panic("invalid access size!\n"); } } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); } return pioDelay; } diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index e9d9c419d..86f664238 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt) ioEnable = false; break; } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return configDelay; } @@ -519,7 +519,7 @@ NSGigE::read(PacketPtr pkt) // doesn't actually DEPEND upon their values // MIB are just hardware stats keepers pkt->set(0); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } else if (daddr > 0x3FC) panic("Something is messed up!\n"); @@ -715,7 +715,7 @@ NSGigE::read(PacketPtr pkt) DPRINTF(EthernetPIO, "read from %#x: data=%d data=%#x\n", daddr, reg, reg); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -1122,7 +1122,7 @@ NSGigE::write(PacketPtr pkt) } else { panic("Invalid Request Size"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc index bd1855847..b07ee1a49 100644 --- a/src/dev/pciconfigall.cc +++ b/src/dev/pciconfigall.cc @@ -54,7 +54,6 @@ PciConfigAll::PciConfigAll(Params *p) Tick PciConfigAll::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); pkt->allocate(); @@ -74,14 +73,13 @@ PciConfigAll::read(PacketPtr pkt) default: panic("invalid access size(?) for PCI configspace!\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return params()->pio_delay; } Tick PciConfigAll::write(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); panic("Attempting to write to config space on non-existant device\n"); M5_DUMMY_RETURN } diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index c2a2bc02d..85337c841 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -68,7 +68,6 @@ PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid, Tick PciDev::PciConfigPort::recvAtomic(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + PCI_CONFIG_SIZE); return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt); @@ -156,7 +155,7 @@ PciDev::readConfig(PacketPtr pkt) default: panic("invalid access size(?) for PCI configspace!\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return configDelay; } @@ -283,7 +282,7 @@ PciDev::writeConfig(PacketPtr pkt) default: panic("invalid access size(?) for PCI configspace!\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return configDelay; } diff --git a/src/dev/sparc/dtod.cc b/src/dev/sparc/dtod.cc index 42275c60a..22df873b6 100644 --- a/src/dev/sparc/dtod.cc +++ b/src/dev/sparc/dtod.cc @@ -74,7 +74,6 @@ DumbTOD::DumbTOD(Params *p) Tick DumbTOD::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); assert(pkt->getSize() == 8); @@ -82,7 +81,7 @@ DumbTOD::read(PacketPtr pkt) pkt->set(todTime); todTime += 1000; - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc index e686e51f7..b27f45eba 100644 --- a/src/dev/sparc/iob.cc +++ b/src/dev/sparc/iob.cc @@ -72,7 +72,6 @@ Iob::Iob(Params *p) Tick Iob::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); if (pkt->getAddr() >= iobManAddr && pkt->getAddr() < iobManAddr + iobManSize) readIob(pkt); @@ -81,7 +80,7 @@ Iob::read(PacketPtr pkt) else panic("Invalid address reached Iob\n"); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -176,7 +175,7 @@ Iob::write(PacketPtr pkt) panic("Invalid address reached Iob\n"); - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/sparc/mm_disk.cc b/src/dev/sparc/mm_disk.cc index 81c5c589a..bbb773c48 100644 --- a/src/dev/sparc/mm_disk.cc +++ b/src/dev/sparc/mm_disk.cc @@ -61,7 +61,6 @@ MmDisk::read(PacketPtr pkt) uint32_t d32; uint64_t d64; - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); accessAddr = pkt->getAddr() - pioAddr; @@ -101,7 +100,7 @@ MmDisk::read(PacketPtr pkt) panic("Invalid access size\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -115,7 +114,6 @@ MmDisk::write(PacketPtr pkt) uint32_t d32; uint64_t d64; - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); accessAddr = pkt->getAddr() - pioAddr; @@ -157,7 +155,7 @@ MmDisk::write(PacketPtr pkt) panic("Invalid access size\n"); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc index 50307aad4..0ad80e077 100644 --- a/src/dev/uart8250.cc +++ b/src/dev/uart8250.cc @@ -111,7 +111,6 @@ Uart8250::Uart8250(Params *p) Tick Uart8250::read(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); assert(pkt->getSize() == 1); @@ -186,7 +185,7 @@ Uart8250::read(PacketPtr pkt) /* uint32_t d32 = *data; DPRINTF(Uart, "Register read to register %#x returned %#x\n", daddr, d32); */ - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } @@ -194,7 +193,6 @@ Tick Uart8250::write(PacketPtr pkt) { - assert(pkt->result == Packet::Unknown); assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); assert(pkt->getSize() == 1); @@ -272,7 +270,7 @@ Uart8250::write(PacketPtr pkt) panic("Tried to access a UART port that doesn't exist\n"); break; } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); return pioDelay; } diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index fb4574844..77178d518 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -121,14 +121,13 @@ Bridge::BridgePort::recvTiming(PacketPtr pkt) otherPort->sendQueue.size(), otherPort->queuedRequests, otherPort->outstandingResponses); - if (pkt->isRequest() && otherPort->reqQueueFull() && pkt->result != - Packet::Nacked) { + if (pkt->isRequest() && otherPort->reqQueueFull() && !pkt->wasNacked()) { DPRINTF(BusBridge, "Remote queue full, nacking\n"); nackRequest(pkt); return true; } - if (pkt->needsResponse() && pkt->result != Packet::Nacked) + if (pkt->needsResponse() && !pkt->wasNacked()) if (respQueueFull()) { DPRINTF(BusBridge, "Local queue full, no space for response, nacking\n"); DPRINTF(BusBridge, "queue size: %d outreq: %d outstanding resp: %d\n", @@ -149,7 +148,7 @@ void Bridge::BridgePort::nackRequest(PacketPtr pkt) { // Nack the packet - pkt->result = Packet::Nacked; + pkt->setNacked(); pkt->setDest(pkt->getSrc()); //put it on the list to send @@ -194,7 +193,7 @@ Bridge::BridgePort::nackRequest(PacketPtr pkt) void Bridge::BridgePort::queueForSendTiming(PacketPtr pkt) { - if (pkt->isResponse() || pkt->result == Packet::Nacked) { + if (pkt->isResponse() || pkt->wasNacked()) { // This is a response for a request we forwarded earlier. The // corresponding PacketBuffer should be stored in the packet's // senderState field. @@ -206,7 +205,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt) // Check if this packet was expecting a response and it's a nacked // packet, in which case we will never being seeing it - if (buf->expectResponse && pkt->result == Packet::Nacked) + if (buf->expectResponse && pkt->wasNacked()) --outstandingResponses; @@ -217,7 +216,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt) } - if (pkt->isRequest() && pkt->result != Packet::Nacked) { + if (pkt->isRequest() && !pkt->wasNacked()) { ++queuedRequests; } @@ -251,7 +250,7 @@ Bridge::BridgePort::trySend() // Ugly! @todo When multilevel coherence works this will be removed if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite && - pkt->result != Packet::Nacked) { + !pkt->wasNacked()) { PacketPtr funcPkt = new Packet(pkt->req, MemCmd::WriteReq, Packet::Broadcast); funcPkt->dataStatic(pkt->getPtr()); @@ -264,7 +263,7 @@ Bridge::BridgePort::trySend() buf->origSrc, pkt->getDest(), pkt->getAddr()); bool wasReq = pkt->isRequest(); - bool wasNacked = pkt->result == Packet::Nacked; + bool wasNacked = pkt->wasNacked(); if (sendTiming(pkt)) { // send successful diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 89d626611..7af764437 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -86,7 +86,7 @@ class Bridge : public MemObject expectResponse(_pkt->needsResponse() && !nack) { - if (!pkt->isResponse() && !nack && pkt->result != Packet::Nacked) + if (!pkt->isResponse() && !nack && !pkt->wasNacked()) pkt->senderState = this; } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index ffd5e25a7..83ce0f87d 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -173,9 +173,8 @@ bool Bus::recvTiming(PacketPtr pkt) { Port *port; - DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s result %d\n", - pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString(), - pkt->result); + DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", + pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); BusPort *pktPort; if (pkt->getSrc() == defaultId) @@ -329,6 +328,8 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder) // id after each int src_id = pkt->getSrc(); + assert(pkt->isRequest()); // hasn't already been satisfied + for (SnoopIter s_iter = snoopPorts.begin(); s_iter != snoopPorts.end(); s_iter++) { @@ -336,7 +337,7 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder) if (p != responder && p->getId() != src_id) { p->sendFunctional(pkt); } - if (pkt->result == Packet::Success) { + if (pkt->isResponse()) { break; } pkt->setSrc(src_id); @@ -369,14 +370,15 @@ Bus::recvAtomic(PacketPtr pkt) DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); + assert(pkt->isRequest()); // Variables for recording original command and snoop response (if // any)... if a snooper respondes, we will need to restore // original command so that additional snoops can take place // properly MemCmd orig_cmd = pkt->cmd; - Packet::Result response_result = Packet::Unknown; MemCmd response_cmd = MemCmd::InvalidCmd; + int orig_src = pkt->getSrc(); Port *target_port = findPort(pkt->getAddr(), pkt->getSrc()); @@ -387,20 +389,18 @@ Bus::recvAtomic(PacketPtr pkt) assert(p != target_port); if (p->getId() != pkt->getSrc()) { p->sendAtomic(pkt); - if (pkt->result != Packet::Unknown) { + if (pkt->isResponse()) { // response from snoop agent assert(pkt->cmd != orig_cmd); assert(pkt->memInhibitAsserted()); - assert(pkt->isResponse()); // should only happen once - assert(response_result == Packet::Unknown); assert(response_cmd == MemCmd::InvalidCmd); // save response state - response_result = pkt->result; response_cmd = pkt->cmd; // restore original packet state for remaining snoopers pkt->cmd = orig_cmd; - pkt->result = Packet::Unknown; + pkt->setSrc(orig_src); + pkt->setDest(Packet::Broadcast); } } } @@ -408,13 +408,11 @@ Bus::recvAtomic(PacketPtr pkt) Tick response_time = target_port->sendAtomic(pkt); // if we got a response from a snooper, restore it here - if (response_result != Packet::Unknown) { - assert(response_cmd != MemCmd::InvalidCmd); + if (response_cmd != MemCmd::InvalidCmd) { // no one else should have responded - assert(pkt->result == Packet::Unknown); + assert(!pkt->isResponse()); assert(pkt->cmd == orig_cmd); pkt->cmd = response_cmd; - pkt->result = response_result; } // why do we have this packet field and the return value both??? @@ -434,8 +432,8 @@ Bus::recvFunctional(PacketPtr pkt) Port* port = findPort(pkt->getAddr(), pkt->getSrc()); functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); - // If the snooping found what we were looking for, we're done. - if (pkt->result != Packet::Success && port) { + // If the snooping hasn't found what we were looking for, keep going. + if (!pkt->isResponse() && port) { port->sendFunctional(pkt); } } diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 5062d6e87..870658675 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -82,7 +82,7 @@ void BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt) { checkFunctional(pkt); - if (pkt->result != Packet::Success) + if (!pkt->isResponse()) sendFunctional(pkt); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b76d7e392..1823ea6b9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -502,7 +502,6 @@ Cache::atomicAccess(PacketPtr pkt) if (pkt->needsResponse()) { pkt->makeAtomicResponse(); - pkt->result = Packet::Success; } return lat; @@ -648,14 +647,13 @@ Cache::handleResponse(PacketPtr pkt) MSHR *mshr = dynamic_cast(pkt->senderState); assert(mshr); - if (pkt->result == Packet::Nacked) { + if (pkt->wasNacked()) { //pkt->reinitFromRequest(); warn("NACKs from devices not connected to the same bus " "not implemented\n"); return; } - assert(pkt->result != Packet::BadAddress); - assert(pkt->result == Packet::Success); + assert(!pkt->isError()); DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr()); MSHRQueue *mq = mshr->queue; @@ -1142,7 +1140,7 @@ void Cache::CpuSidePort::recvFunctional(PacketPtr pkt) { checkFunctional(pkt); - if (pkt->result != Packet::Success) + if (!pkt->isResponse()) myCache()->functionalAccess(pkt, cache->memSidePort); } @@ -1180,7 +1178,7 @@ Cache::MemSidePort::recvTiming(PacketPtr pkt) // this needs to be fixed so that the cache updates the mshr and sends the // packet back out on the link, but it probably won't happen so until this // gets fixed, just panic when it does - if (pkt->result == Packet::Nacked) + if (pkt->wasNacked()) panic("Need to implement cache resending nacked packets!\n"); if (pkt->isRequest() && blocked) { @@ -1216,7 +1214,7 @@ void Cache::MemSidePort::recvFunctional(PacketPtr pkt) { checkFunctional(pkt); - if (pkt->result != Packet::Success) + if (!pkt->isResponse()) myCache()->functionalAccess(pkt, cache->cpuSidePort); } diff --git a/src/mem/packet.cc b/src/mem/packet.cc index cd0ed8a2e..55fe13f3c 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -115,7 +115,13 @@ MemCmd::commandInfo[] = SwapResp, "SwapReq" }, /* SwapResp -- for Swap ldstub type operations */ { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData), - InvalidCmd, "SwapResp" } + InvalidCmd, "SwapResp" }, + /* NetworkNackError -- nacked at network layer (not by protocol) */ + { SET2(IsRequest, IsError), InvalidCmd, "NetworkNackError" }, + /* InvalidDestError -- packet dest field invalid */ + { SET2(IsRequest, IsError), InvalidCmd, "InvalidDestError" }, + /* BadAddressError -- memory address invalid */ + { SET2(IsRequest, IsError), InvalidCmd, "BadAddressError" } }; @@ -205,7 +211,7 @@ Packet::checkFunctional(Addr addr, int size, uint8_t *data) if (func_start >= val_start && func_end <= val_end) { allocate(); std::memcpy(getPtr(), data + offset, getSize()); - result = Packet::Success; + makeResponse(); return true; } else { // In this case the timing packet only partially satisfies @@ -245,15 +251,6 @@ operator<<(std::ostream &o, const Packet &p) o << p.getAddr() + p.getSize() - 1 << "] "; o.unsetf(std::ios_base::hex| std::ios_base::showbase); - if (p.result == Packet::Success) - o << "Successful "; - if (p.result == Packet::BadAddress) - o << "BadAddress "; - if (p.result == Packet::Nacked) - o << "Nacked "; - if (p.result == Packet::Unknown) - o << "Inflight "; - if (p.isRead()) o << "Read "; if (p.isWrite()) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index fc1c283ed..10b9f490c 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -84,6 +84,13 @@ class MemCmd StoreCondResp, SwapReq, SwapResp, + // Error responses + // @TODO these should be classified as responses rather than + // requests; coding them as requests initially for backwards + // compatibility + NetworkNackError, // nacked at network layer (not by protocol) + InvalidDestError, // packet dest field invalid + BadAddressError, // memory address invalid NUM_MEM_CMDS }; @@ -103,6 +110,7 @@ class MemCmd IsHWPrefetch, IsLocked, //!< Alpha/MIPS LL or SC access HasData, //!< There is an associated payload + IsError, //!< Error response NUM_COMMAND_ATTRIBUTES }; @@ -135,12 +143,13 @@ class MemCmd bool isWrite() const { return testCmdAttrib(IsWrite); } bool isRequest() const { return testCmdAttrib(IsRequest); } bool isResponse() const { return testCmdAttrib(IsResponse); } - bool needsExclusive() const { return testCmdAttrib(NeedsExclusive); } + bool needsExclusive() const { return testCmdAttrib(NeedsExclusive); } bool needsResponse() const { return testCmdAttrib(NeedsResponse); } bool isInvalidate() const { return testCmdAttrib(IsInvalidate); } bool hasData() const { return testCmdAttrib(HasData); } bool isReadWrite() const { return isRead() && isWrite(); } bool isLocked() const { return testCmdAttrib(IsLocked); } + bool isError() const { return testCmdAttrib(IsError); } const Command responseCommand() const { return commandInfo[cmd].response; @@ -184,6 +193,12 @@ class Packet : public FastAlloc typedef MemCmd::Command Command; + /** The command field of the packet. */ + MemCmd cmd; + + /** A pointer to the original request. */ + RequestPtr req; + private: /** A pointer to the data being transfered. It can be differnt * sizes at each level of the heirarchy so it belongs in the @@ -223,19 +238,28 @@ class Packet : public FastAlloc * (unlike * addr, size, and src). */ short dest; + /** The original value of the command field. Only valid when the + * current command field is an error condition; in that case, the + * previous contents of the command field are copied here. This + * field is *not* set on non-error responses. + */ + MemCmd origCmd; + /** Are the 'addr' and 'size' fields valid? */ bool addrSizeValid; /** Is the 'src' field valid? */ bool srcValid; + bool destValid; - enum SnoopFlag { + enum Flag { + // Snoop flags MemInhibit, Shared, - NUM_SNOOP_FLAGS + NUM_PACKET_FLAGS }; - /** Coherence snoopFlags for snooping */ - std::bitset snoopFlags; + /** Status flags */ + std::bitset flags; public: @@ -252,22 +276,6 @@ class Packet : public FastAlloc * should be routed based on its address. */ static const short Broadcast = -1; - /** A pointer to the original request. */ - RequestPtr req; - - /** A virtual base opaque structure used to hold coherence-related - * state. A specific subclass would be derived from this to - * carry state specific to a particular coherence protocol. */ - class CoherenceState : public FastAlloc { - public: - virtual ~CoherenceState() {} - }; - - /** This packet's coherence state. Caches should use - * dynamic_cast<> to cast to the state appropriate for the - * system's coherence protocol. */ - CoherenceState *coherence; - /** A virtual base opaque structure used to hold state associated * with the packet but specific to the sending device (e.g., an * MSHR). A pointer to this state is returned in the packet's @@ -284,11 +292,6 @@ class Packet : public FastAlloc * to cast to the state appropriate to the sender. */ SenderState *senderState; - public: - - /** The command field of the packet. */ - MemCmd cmd; - /** Return the string name of the cmd field (for debugging and * tracing). */ const std::string &cmdString() const { return cmd.toString(); } @@ -296,68 +299,59 @@ class Packet : public FastAlloc /** Return the index of this command. */ inline int cmdToIndex() const { return cmd.toInt(); } - public: - bool isRead() const { return cmd.isRead(); } bool isWrite() const { return cmd.isWrite(); } bool isRequest() const { return cmd.isRequest(); } bool isResponse() const { return cmd.isResponse(); } - bool needsExclusive() const { return cmd.needsExclusive(); } + bool needsExclusive() const { return cmd.needsExclusive(); } bool needsResponse() const { return cmd.needsResponse(); } bool isInvalidate() const { return cmd.isInvalidate(); } bool hasData() const { return cmd.hasData(); } bool isReadWrite() const { return cmd.isReadWrite(); } bool isLocked() const { return cmd.isLocked(); } - - void assertMemInhibit() { snoopFlags[MemInhibit] = true; } - void assertShared() { snoopFlags[Shared] = true; } - bool memInhibitAsserted() { return snoopFlags[MemInhibit]; } - bool sharedAsserted() { return snoopFlags[Shared]; } + bool isError() const { return cmd.isError(); } + + // Snoop flags + void assertMemInhibit() { flags[MemInhibit] = true; } + void assertShared() { flags[Shared] = true; } + bool memInhibitAsserted() { return flags[MemInhibit]; } + bool sharedAsserted() { return flags[Shared]; } + + // Network error conditions... encapsulate them as methods since + // their encoding keeps changing (from result field to command + // field, etc.) + void setNacked() { origCmd = cmd; cmd = MemCmd::NetworkNackError; } + void setBadAddress() { origCmd = cmd; cmd = MemCmd::BadAddressError; } + bool wasNacked() { return cmd == MemCmd::NetworkNackError; } + bool hadBadAddress() { return cmd == MemCmd::BadAddressError; } bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN } - /** Possible results of a packet's request. */ - enum Result - { - Success, - BadAddress, - Nacked, - Unknown - }; - - /** The result of this packet's request. */ - Result result; - /** Accessor function that returns the source index of the packet. */ - short getSrc() const { assert(srcValid); return src; } + short getSrc() const { assert(srcValid); return src; } void setSrc(short _src) { src = _src; srcValid = true; } /** Reset source field, e.g. to retransmit packet on different bus. */ void clearSrc() { srcValid = false; } /** Accessor function that returns the destination index of the packet. */ - short getDest() const { return dest; } - void setDest(short _dest) { dest = _dest; } + short getDest() const { assert(destValid); return dest; } + void setDest(short _dest) { dest = _dest; destValid = true; } Addr getAddr() const { assert(addrSizeValid); return addr; } - int getSize() const { assert(addrSizeValid); return size; } + int getSize() const { assert(addrSizeValid); return size; } Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); } - void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } - void cmdOverride(MemCmd newCmd) { cmd = newCmd; } - /** Constructor. Note that a Request object must be constructed * first, but the Requests's physical address and size fields * need not be valid. The command and destination addresses * must be supplied. */ Packet(Request *_req, MemCmd _cmd, short _dest) - : data(NULL), staticData(false), dynamicData(false), arrayData(false), + : cmd(_cmd), req(_req), + data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), - snoopFlags(0), - time(curTick), - req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), - result(Unknown) + addrSizeValid(_req->validPaddr), srcValid(false), destValid(true), + flags(0), time(curTick), senderState(NULL) { } @@ -365,13 +359,11 @@ class Packet : public FastAlloc * a request that is for a whole block, not the address from the req. * this allows for overriding the size/addr of the req.*/ Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize) - : data(NULL), staticData(false), dynamicData(false), arrayData(false), + : cmd(_cmd), req(_req), + data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), - snoopFlags(0), - time(curTick), - req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), - result(Unknown) + addrSizeValid(_req->validPaddr), srcValid(false), destValid(true), + flags(0), time(curTick), senderState(NULL) { } @@ -382,15 +374,14 @@ class Packet : public FastAlloc * dynamic data, user must guarantee that the new packet's * lifetime is less than that of the original packet. */ Packet(Packet *origPkt) - : data(NULL), staticData(false), dynamicData(false), arrayData(false), + : cmd(origPkt->cmd), req(origPkt->req), + data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(origPkt->addr), size(origPkt->size), src(origPkt->src), dest(origPkt->dest), - addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid), - snoopFlags(origPkt->snoopFlags), - time(curTick), - req(origPkt->req), coherence(origPkt->coherence), - senderState(origPkt->senderState), cmd(origPkt->cmd), - result(origPkt->result) + addrSizeValid(origPkt->addrSizeValid), + srcValid(origPkt->srcValid), destValid(origPkt->destValid), + flags(origPkt->flags), + time(curTick), senderState(origPkt->senderState) { } @@ -405,12 +396,11 @@ class Packet : public FastAlloc * multiple transactions. */ void reinitFromRequest() { assert(req->validPaddr); - snoopFlags = 0; + flags = 0; addr = req->paddr; size = req->size; time = req->time; addrSizeValid = true; - result = Unknown; if (dynamicData) { deleteData(); dynamicData = false; @@ -424,34 +414,24 @@ class Packet : public FastAlloc * destination fields are *not* modified, as is appropriate for * atomic accesses. */ - void makeAtomicResponse() + void makeResponse() { assert(needsResponse()); assert(isRequest()); - assert(result == Unknown); cmd = cmd.responseCommand(); - result = Success; + dest = src; + destValid = srcValid; + srcValid = false; } - /** - * Perform the additional work required for timing responses above - * and beyond atomic responses; i.e., change the destination to - * point back to the requester and clear the source field. - */ - void convertAtomicToTimingResponse() + void makeAtomicResponse() { - dest = getSrc(); - srcValid = false; + makeResponse(); } - /** - * Take a request packet and modify it in place to be suitable for - * returning as a response to a timing request. - */ void makeTimingResponse() { - makeAtomicResponse(); - convertAtomicToTimingResponse(); + makeResponse(); } /** @@ -462,9 +442,10 @@ class Packet : public FastAlloc void reinitNacked() { - assert(needsResponse() && result == Nacked); - dest = Broadcast; - result = Unknown; + assert(wasNacked()); + cmd = origCmd; + assert(needsResponse()); + setDest(Broadcast); } diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 93cba96c4..2742eca51 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -322,7 +322,7 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt) pkt->cmdString()); } - pkt->result = Packet::Success; + pkt->makeAtomicResponse(); } diff --git a/src/mem/port.cc b/src/mem/port.cc index e6ea773f2..ba4f23668 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -58,12 +58,11 @@ void Port::blobHelper(Addr addr, uint8_t *p, int size, MemCmd cmd) { Request req; - Packet pkt(&req, cmd, Packet::Broadcast); for (ChunkGenerator gen(addr, size, peerBlockSize()); !gen.done(); gen.next()) { req.setPhys(gen.addr(), gen.size(), 0); - pkt.reinitFromRequest(); + Packet pkt(&req, cmd, Packet::Broadcast); pkt.dataStatic(p); sendFunctional(&pkt); p += gen.size(); diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 6c8c12ce2..d6ff64608 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -55,7 +55,7 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt) checkFunctional(pkt); // Just do an atomic access and throw away the returned latency - if (pkt->result != Packet::Success) + if (!pkt->isResponse()) recvAtomic(pkt); } @@ -68,7 +68,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) // correctly with the drain code, so that would need to be fixed // if we ever added it back. assert(pkt->isRequest()); - assert(pkt->result == Packet::Unknown); if (pkt->memInhibitAsserted()) { // snooper will supply based on copy of packet @@ -85,7 +84,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) // recvAtomic() should already have turned packet into // atomic response assert(pkt->isResponse()); - pkt->convertAtomicToTimingResponse(); schedSendTiming(pkt, curTick + latency); } else { delete pkt->req; @@ -138,12 +136,15 @@ void SimpleTimingPort::sendDeferredPacket() { assert(deferredPacketReady()); - bool success = sendTiming(transmitList.front().pkt); + // take packet off list here; if recvTiming() on the other side + // calls sendTiming() back on us (like SimpleTimingCpu does), then + // we get confused by having a non-active packet on transmitList + DeferredPacket dp = transmitList.front(); + transmitList.pop_front(); + bool success = sendTiming(dp.pkt); if (success) { - //send successful, remove packet - transmitList.pop_front(); - if (!transmitList.empty()) { + if (!transmitList.empty() && !sendEvent->scheduled()) { Tick time = transmitList.front().tick; sendEvent->schedule(time <= curTick ? curTick+1 : time); } @@ -152,6 +153,12 @@ SimpleTimingPort::sendDeferredPacket() drainEvent->process(); drainEvent = NULL; } + } else { + // Unsuccessful, need to put back on transmitList. Callee + // should not have messed with it (since it didn't accept that + // packet), so we can just push it back on the front. + assert(!sendEvent->scheduled()); + transmitList.push_front(dp); } waitingOnRetry = !success; -- cgit v1.2.3 From 6babda7123be5e69db137e77589d88c768c19345 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 13:34:16 -0700 Subject: Fix up a few statistics problems. Stats pretty much line up with old code, except: - bug in old code included L1 latency in L2 miss time, making it too high - UniCoherence did cache-to-cache transfers even from non-owner caches, so occasionally the icache would get a block from the dcache not the L2 - L2 can now receive ReadExReq from L1 since L1s have coherence --HG-- extra : convert_revision : 5052c1a1767b5a662f30a88f16012165a73b791c --- src/mem/cache/base_cache.cc | 54 +++++++++++++++++++++------------------- src/mem/cache/base_cache.hh | 6 ++--- src/mem/cache/cache_impl.hh | 21 +++++++++------- src/mem/cache/miss/mshr.cc | 20 +++++++-------- src/mem/cache/miss/mshr.hh | 10 +++++--- src/mem/cache/miss/mshr_queue.cc | 4 +-- src/mem/cache/miss/mshr_queue.hh | 6 ++--- src/mem/tport.hh | 2 +- 8 files changed, 65 insertions(+), 58 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 870658675..ec9e1cf9b 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -150,20 +150,29 @@ BaseCache::regStats() ; } +// These macros make it easier to sum the right subset of commands and +// to change the subset of commands that are considered "demand" vs +// "non-demand" +#define SUM_DEMAND(s) \ + (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq]) + +// should writebacks be included here? prior code was inconsistent... +#define SUM_NON_DEMAND(s) \ + (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq]) + demandHits .name(name() + ".demand_hits") .desc("number of demand (read+write) hits") .flags(total) ; - demandHits = hits[MemCmd::ReadReq] + hits[MemCmd::WriteReq]; + demandHits = SUM_DEMAND(hits); overallHits .name(name() + ".overall_hits") .desc("number of overall hits") .flags(total) ; - overallHits = demandHits + hits[MemCmd::SoftPFReq] + hits[MemCmd::HardPFReq] - + hits[MemCmd::Writeback]; + overallHits = demandHits + SUM_NON_DEMAND(hits); // Miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -183,15 +192,14 @@ BaseCache::regStats() .desc("number of demand (read+write) misses") .flags(total) ; - demandMisses = misses[MemCmd::ReadReq] + misses[MemCmd::WriteReq]; + demandMisses = SUM_DEMAND(misses); overallMisses .name(name() + ".overall_misses") .desc("number of overall misses") .flags(total) ; - overallMisses = demandMisses + misses[MemCmd::SoftPFReq] + - misses[MemCmd::HardPFReq] + misses[MemCmd::Writeback]; + overallMisses = demandMisses + SUM_NON_DEMAND(misses); // Miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -211,15 +219,14 @@ BaseCache::regStats() .desc("number of demand (read+write) miss cycles") .flags(total) ; - demandMissLatency = missLatency[MemCmd::ReadReq] + missLatency[MemCmd::WriteReq]; + demandMissLatency = SUM_DEMAND(missLatency); overallMissLatency .name(name() + ".overall_miss_latency") .desc("number of overall miss cycles") .flags(total) ; - overallMissLatency = demandMissLatency + missLatency[MemCmd::SoftPFReq] + - missLatency[MemCmd::HardPFReq]; + overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); // access formulas for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -368,15 +375,14 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR hits") .flags(total) ; - demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq]; + demandMshrHits = SUM_DEMAND(mshr_hits); overallMshrHits .name(name() + ".overall_mshr_hits") .desc("number of overall MSHR hits") .flags(total) ; - overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] + - mshr_hits[MemCmd::HardPFReq]; + overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits); // MSHR miss statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -396,15 +402,14 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR misses") .flags(total) ; - demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq]; + demandMshrMisses = SUM_DEMAND(mshr_misses); overallMshrMisses .name(name() + ".overall_mshr_misses") .desc("number of overall MSHR misses") .flags(total) ; - overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] + - mshr_misses[MemCmd::HardPFReq]; + overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses); // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -424,16 +429,15 @@ BaseCache::regStats() .desc("number of demand (read+write) MSHR miss cycles") .flags(total) ; - demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq] - + mshr_miss_latency[MemCmd::WriteReq]; + demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency); overallMshrMissLatency .name(name() + ".overall_mshr_miss_latency") .desc("number of overall MSHR miss cycles") .flags(total) ; - overallMshrMissLatency = demandMshrMissLatency + - mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq]; + overallMshrMissLatency = + demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency); // MSHR uncacheable statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -453,9 +457,8 @@ BaseCache::regStats() .desc("number of overall MSHR uncacheable misses") .flags(total) ; - overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq] - + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq] - + mshr_uncacheable[MemCmd::HardPFReq]; + overallMshrUncacheable = + SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable); // MSHR miss latency statistics for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { @@ -475,10 +478,9 @@ BaseCache::regStats() .desc("number of overall MSHR uncacheable cycles") .flags(total) ; - overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq] - + mshr_uncacheable_lat[MemCmd::WriteReq] - + mshr_uncacheable_lat[MemCmd::SoftPFReq] - + mshr_uncacheable_lat[MemCmd::HardPFReq]; + overallMshrUncacheableLatency = + SUM_DEMAND(mshr_uncacheable_lat) + + SUM_NON_DEMAND(mshr_uncacheable_lat); #if 0 // MSHR access formulas diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 09484a14a..fcc040bd9 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -476,10 +476,10 @@ class BaseCache : public MemObject } } - Tick nextMSHRReadyTick() + Tick nextMSHRReadyTime() { - return std::min(mshrQueue.nextMSHRReadyTick(), - writeBuffer.nextMSHRReadyTick()); + return std::min(mshrQueue.nextMSHRReadyTime(), + writeBuffer.nextMSHRReadyTime()); } /** diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 1823ea6b9..568e7ff63 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -615,7 +615,7 @@ Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, if (!target->pkt->req->isUncacheable()) { missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->time; + completion_time - target->recvTime; } target->pkt->makeTimingResponse(); cpuSidePort->respond(target->pkt, completion_time); @@ -668,11 +668,14 @@ Cache::handleResponse(PacketPtr pkt) // Can we deallocate MSHR when done? bool deallocate = false; + // Initial target is used just for stats + MSHR::Target *initial_tgt = mshr->getTarget(); + int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); + Tick miss_latency = curTick - initial_tgt->recvTime; + if (mshr->isCacheFill) { -#if 0 - mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; -#endif + mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); @@ -698,8 +701,8 @@ Cache::handleResponse(PacketPtr pkt) } } else { if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] += - curTick - pkt->time; + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; } while (mshr->hasTargets()) { @@ -1262,8 +1265,8 @@ Cache::MemSidePort::sendPacket() // tried to send packet... if it was successful (no retry), see if // we need to rerequest bus or not if (!waitingOnRetry) { - Tick nextReady = std::min(deferredPacketReadyTick(), - myCache()->nextMSHRReadyTick()); + Tick nextReady = std::min(deferredPacketReadyTime(), + myCache()->nextMSHRReadyTime()); // @TODO: need to facotr in prefetch requests here somehow if (nextReady != MaxTick) { DPRINTF(CachePort, "more packets to send @ %d\n", nextReady); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 63b3cacc2..5d5e63f90 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -56,11 +56,11 @@ MSHR::MSHR() void MSHR::allocate(Addr _addr, int _size, PacketPtr target, - Tick when, Counter _order) + Tick whenReady, Counter _order) { addr = _addr; size = _size; - readyTick = when; + readyTime = whenReady; order = _order; assert(target); isCacheFill = false; @@ -71,7 +71,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, ntargets = 1; // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set cpuSide to true here. - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); assert(deferredTargets.empty()); deferredNeedsExclusive = false; pendingInvalidate = false; @@ -94,33 +94,33 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order) +MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order) { if (inService) { if (!deferredTargets.empty() || pendingInvalidate || (!needsExclusive && target->needsExclusive())) { // need to put on deferred list - deferredTargets.push_back(Target(target, when, _order, true)); + deferredTargets.push_back(Target(target, whenReady, _order, true)); if (target->needsExclusive()) { deferredNeedsExclusive = true; } } else { // still OK to append to outstanding request - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); } } else { if (target->needsExclusive()) { needsExclusive = true; } - targets.push_back(Target(target, when, _order, true)); + targets.push_back(Target(target, whenReady, _order, true)); } ++ntargets; } void -MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order) +MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) { assert(inService); // don't bother to call otherwise @@ -137,7 +137,7 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order) if (needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets.push_back(Target(new Packet(pkt), when, _order, false)); + targets.push_back(Target(new Packet(pkt), whenReady, _order, false)); ++ntargets; if (needsExclusive) { @@ -177,7 +177,7 @@ MSHR::promoteDeferredTargets() pendingShared = false; deferredNeedsExclusive = false; order = targets.front().order; - readyTick = std::max(curTick, targets.front().time); + readyTime = std::max(curTick, targets.front().readyTime); return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 4db7b1cfe..293f290b8 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -54,15 +54,17 @@ class MSHR : public Packet::SenderState class Target { public: - Tick time; //!< Time when request was received (for stats) + Tick recvTime; //!< Time when request was received (for stats) + Tick readyTime; //!< Time when request is ready to be serviced Counter order; //!< Global order (for memory consistency mgmt) PacketPtr pkt; //!< Pending request packet. bool cpuSide; //!< Did request come from cpu side or mem side? bool isCpuSide() { return cpuSide; } - Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide) - : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide) + Target(PacketPtr _pkt, Tick _readyTime, Counter _order, bool _cpuSide) + : recvTime(curTick), readyTime(_readyTime), order(_order), + pkt(_pkt), cpuSide(_cpuSide) {} }; @@ -81,7 +83,7 @@ class MSHR : public Packet::SenderState MSHRQueue *queue; /** Cycle when ready to issue */ - Tick readyTick; + Tick readyTime; /** Order number assigned by the miss queue. */ Counter order; diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 18184bd20..56ec62a7d 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -111,14 +111,14 @@ MSHRQueue::findPending(Addr addr, int size) const MSHR::Iterator MSHRQueue::addToReadyList(MSHR *mshr) { - if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) { + if (readyList.empty() || readyList.back()->readyTime <= mshr->readyTime) { return readyList.insert(readyList.end(), mshr); } MSHR::Iterator i = readyList.begin(); MSHR::Iterator end = readyList.end(); for (; i != end; ++i) { - if ((*i)->readyTick > mshr->readyTick) { + if ((*i)->readyTime > mshr->readyTime) { return readyList.insert(i, mshr); } } diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index fd61dec8b..1f1d59e98 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -193,15 +193,15 @@ class MSHRQueue */ MSHR *getNextMSHR() const { - if (readyList.empty() || readyList.front()->readyTick > curTick) { + if (readyList.empty() || readyList.front()->readyTime > curTick) { return NULL; } return readyList.front(); } - Tick nextMSHRReadyTick() const + Tick nextMSHRReadyTime() const { - return readyList.empty() ? MaxTick : readyList.front()->readyTick; + return readyList.empty() ? MaxTick : readyList.front()->readyTime; } }; diff --git a/src/mem/tport.hh b/src/mem/tport.hh index bfed29f34..bc9da6c44 100644 --- a/src/mem/tport.hh +++ b/src/mem/tport.hh @@ -105,7 +105,7 @@ class SimpleTimingPort : public Port bool deferredPacketReady() { return !transmitList.empty() && transmitList.front().tick <= curTick; } - Tick deferredPacketReadyTick() + Tick deferredPacketReadyTime() { return transmitList.empty() ? MaxTick : transmitList.front().tick; } void schedSendEvent(Tick when) -- cgit v1.2.3 From f0c4dd79200bb76f472aa09d6aff02b67a1db8c5 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 13:56:25 -0700 Subject: Factor out a little more common code. --HG-- extra : convert_revision : 626255a91679d534030c91bcdb4fc1bed36ceb9b --- src/mem/cache/cache_impl.hh | 78 +++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 568e7ff63..b4c3c6359 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -143,6 +143,37 @@ Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) } +template +void +Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) +{ + assert(blk); + assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); + assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); + + // Check RMW operations first since both isRead() and + // isWrite() will be true for them + if (pkt->cmd == MemCmd::SwapReq) { + cmpAndSwap(blk, pkt); + } else if (pkt->isWrite()) { + if (blk->checkWrite(pkt)) { + blk->status |= BlkDirty; + pkt->writeDataToBlock(blk->data, blkSize); + } + } else if (pkt->isRead()) { + if (pkt->isLocked()) { + blk->trackLoadLocked(pkt); + } + pkt->setDataFromBlock(blk->data, blkSize); + } else { + // Not a read or write... must be an upgrade. it's OK + // to just ack those as long as we have an exclusive + // copy at this level. + assert(pkt->cmd == MemCmd::UpgradeReq); + } +} + + ///////////////////////////////////////////////////// // // MSHR helper functions @@ -237,27 +268,7 @@ Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) // OK to satisfy access hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; satisfied = true; - - // Check RMW operations first since both isRead() and - // isWrite() will be true for them - if (pkt->cmd == MemCmd::SwapReq) { - cmpAndSwap(blk, pkt); - } else if (pkt->isWrite()) { - if (blk->checkWrite(pkt)) { - blk->status |= BlkDirty; - pkt->writeDataToBlock(blk->data, blkSize); - } - } else if (pkt->isRead()) { - if (pkt->isLocked()) { - blk->trackLoadLocked(pkt); - } - pkt->setDataFromBlock(blk->data, blkSize); - } else { - // Not a read or write... must be an upgrade. it's OK - // to just ack those as long as we have an exclusive - // copy at this level. - assert(pkt->cmd == MemCmd::UpgradeReq); - } + satisfyCpuSideRequest(pkt, blk); } else { // permission violation... nothing to do here, leave unsatisfied // for statistics purposes this counts like a complete miss @@ -558,31 +569,6 @@ Cache::functionalAccess(PacketPtr pkt, ///////////////////////////////////////////////////// -template -void -Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) -{ - assert(blk); - assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); - assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead()); - assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); - - if (pkt->isWrite()) { - if (blk->checkWrite(pkt)) { - blk->status |= BlkDirty; - pkt->writeDataToBlock(blk->data, blkSize); - } - } else if (pkt->isReadWrite()) { - cmpAndSwap(blk, pkt); - } else { - if (pkt->isLocked()) { - blk->trackLoadLocked(pkt); - } - pkt->setDataFromBlock(blk->data, blkSize); - } -} - - template bool Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, -- cgit v1.2.3 From ee54ad318a63e868ab10bbc1b714bbb8209a11da Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 17:45:58 -0700 Subject: Event descriptions should not end in "event" (they function as adjectives not nouns) --HG-- extra : convert_revision : 6506474ff3356ae8c80ed276c3608d8a4680bfdb --- src/arch/mips/regfile/misc_regfile.cc | 2 +- src/cpu/base.cc | 2 +- src/cpu/o3/commit_impl.hh | 2 +- src/cpu/o3/cpu.cc | 6 +++--- src/cpu/o3/inst_queue_impl.hh | 2 +- src/cpu/o3/lsq_unit_impl.hh | 2 +- src/cpu/ozone/back_end_impl.hh | 4 ++-- src/cpu/ozone/cpu_impl.hh | 2 +- src/cpu/ozone/inorder_back_end_impl.hh | 2 +- src/cpu/ozone/inst_queue_impl.hh | 2 +- src/cpu/ozone/lsq_unit_impl.hh | 2 +- src/cpu/ozone/lw_back_end_impl.hh | 2 +- src/cpu/ozone/lw_lsq_impl.hh | 2 +- src/cpu/quiesce_event.cc | 2 +- src/cpu/simple/atomic.cc | 2 +- src/cpu/simple/timing.hh | 6 +++--- src/cpu/trace/opt_cpu.cc | 2 +- src/cpu/trace/trace_cpu.cc | 2 +- src/dev/ethertap.hh | 2 +- src/dev/uart8250.cc | 2 +- src/mem/bridge.hh | 2 +- 21 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/arch/mips/regfile/misc_regfile.cc b/src/arch/mips/regfile/misc_regfile.cc index c97d93cf9..71be3adf9 100755 --- a/src/arch/mips/regfile/misc_regfile.cc +++ b/src/arch/mips/regfile/misc_regfile.cc @@ -357,7 +357,7 @@ MiscRegFile::CP0Event::process() const char * MiscRegFile::CP0Event::description() { - return "Coprocessor-0 event"; + return "Coprocessor-0"; } void diff --git a/src/cpu/base.cc b/src/cpu/base.cc index f86313da0..cf007a06b 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -91,7 +91,7 @@ CPUProgressEvent::process() const char * CPUProgressEvent::description() { - return "CPU Progress event"; + return "CPU Progress"; } #if FULL_SYSTEM diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 9411c6c62..f263383ae 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -67,7 +67,7 @@ template const char * DefaultCommit::TrapEvent::description() { - return "Trap event"; + return "Trap"; } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 6a3eb9c43..2bf8f9832 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -83,7 +83,7 @@ template const char * FullO3CPU::TickEvent::description() { - return "FullO3CPU tick event"; + return "FullO3CPU tick"; } template @@ -112,7 +112,7 @@ template const char * FullO3CPU::ActivateThreadEvent::description() { - return "FullO3CPU \"Activate Thread\" event"; + return "FullO3CPU \"Activate Thread\""; } template @@ -144,7 +144,7 @@ template const char * FullO3CPU::DeallocateContextEvent::description() { - return "FullO3CPU \"Deallocate Context\" event"; + return "FullO3CPU \"Deallocate Context\""; } template diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index bdf5f07aa..99bffe1a6 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -60,7 +60,7 @@ template const char * InstructionQueue::FUCompletion::description() { - return "Functional unit completion event"; + return "Functional unit completion"; } template diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 91e616589..810a6d29f 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -69,7 +69,7 @@ template const char * LSQUnit::WritebackEvent::description() { - return "Store writeback event"; + return "Store writeback"; } template diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh index 4078699fe..27146ecf0 100644 --- a/src/cpu/ozone/back_end_impl.hh +++ b/src/cpu/ozone/back_end_impl.hh @@ -583,7 +583,7 @@ template const char * BackEnd::LdWritebackEvent::description() { - return "Load writeback event"; + return "Load writeback"; } @@ -603,7 +603,7 @@ template const char * BackEnd::DCacheCompletionEvent::description() { - return "Cache completion event"; + return "Cache completion"; } template diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index d1214223b..d73e5768a 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -84,7 +84,7 @@ template const char * OzoneCPU::TickEvent::description() { - return "OzoneCPU tick event"; + return "OzoneCPU tick"; } template diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh index 8d7ebb60e..c57fa0200 100644 --- a/src/cpu/ozone/inorder_back_end_impl.hh +++ b/src/cpu/ozone/inorder_back_end_impl.hh @@ -540,5 +540,5 @@ template const char * InorderBackEnd::DCacheCompletionEvent::description() { - return "DCache completion event"; + return "DCache completion"; } diff --git a/src/cpu/ozone/inst_queue_impl.hh b/src/cpu/ozone/inst_queue_impl.hh index ea9d03c0d..461c7eb0f 100644 --- a/src/cpu/ozone/inst_queue_impl.hh +++ b/src/cpu/ozone/inst_queue_impl.hh @@ -64,7 +64,7 @@ template const char * InstQueue::FUCompletion::description() { - return "Functional unit completion event"; + return "Functional unit completion"; } #endif template diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh index c46eb90be..e08e54835 100644 --- a/src/cpu/ozone/lsq_unit_impl.hh +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -62,7 +62,7 @@ template const char * OzoneLSQ::StoreCompletionEvent::description() { - return "LSQ store completion event"; + return "LSQ store completion"; } template diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index c0a9cad24..f84bda348 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -121,7 +121,7 @@ template const char * LWBackEnd::TrapEvent::description() { - return "Trap event"; + return "Trap"; } template diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index eefc0df83..e3000288c 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -57,7 +57,7 @@ template const char * OzoneLWLSQ::WritebackEvent::description() { - return "Store writeback event"; + return "Store writeback"; } template diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc index fa79e6d1e..3495a0e52 100644 --- a/src/cpu/quiesce_event.cc +++ b/src/cpu/quiesce_event.cc @@ -47,5 +47,5 @@ EndQuiesceEvent::process() const char* EndQuiesceEvent::description() { - return "End Quiesce Event."; + return "End Quiesce"; } diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index bcd6662c8..8e8da2fa2 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -57,7 +57,7 @@ AtomicSimpleCPU::TickEvent::process() const char * AtomicSimpleCPU::TickEvent::description() { - return "AtomicSimpleCPU tick event"; + return "AtomicSimpleCPU tick"; } Port * diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 39958bfb6..ba194b3fa 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -101,7 +101,7 @@ class TimingSimpleCPU : public BaseSimpleCPU TickEvent(TimingSimpleCPU *_cpu) :Event(&mainEventQueue), cpu(_cpu) {} - const char *description() { return "Timing CPU clock event"; } + const char *description() { return "Timing CPU tick"; } void schedule(PacketPtr _pkt, Tick t); }; @@ -127,7 +127,7 @@ class TimingSimpleCPU : public BaseSimpleCPU ITickEvent(TimingSimpleCPU *_cpu) : TickEvent(_cpu) {} void process(); - const char *description() { return "Timing CPU clock event"; } + const char *description() { return "Timing CPU icache tick"; } }; ITickEvent tickEvent; @@ -155,7 +155,7 @@ class TimingSimpleCPU : public BaseSimpleCPU DTickEvent(TimingSimpleCPU *_cpu) : TickEvent(_cpu) {} void process(); - const char *description() { return "Timing CPU clock event"; } + const char *description() { return "Timing CPU dcache tick"; } }; DTickEvent tickEvent; diff --git a/src/cpu/trace/opt_cpu.cc b/src/cpu/trace/opt_cpu.cc index 996e89f01..0f2944f07 100644 --- a/src/cpu/trace/opt_cpu.cc +++ b/src/cpu/trace/opt_cpu.cc @@ -207,7 +207,7 @@ OptCPU::TickEvent::process() const char * OptCPU::TickEvent::description() { - return "OptCPU tick event"; + return "OptCPU tick"; } diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc index 3c9da4849..32ed6c7d7 100644 --- a/src/cpu/trace/trace_cpu.cc +++ b/src/cpu/trace/trace_cpu.cc @@ -148,7 +148,7 @@ TraceCPU::TickEvent::process() const char * TraceCPU::TickEvent::description() { - return "TraceCPU tick event"; + return "TraceCPU tick"; } diff --git a/src/dev/ethertap.hh b/src/dev/ethertap.hh index f64ed7187..3d2838817 100644 --- a/src/dev/ethertap.hh +++ b/src/dev/ethertap.hh @@ -89,7 +89,7 @@ class EtherTap : public EtherInt TxEvent(EtherTap *_tap) : Event(&mainEventQueue), tap(_tap) {} void process() { tap->retransmit(); } - virtual const char *description() { return "retransmit event"; } + virtual const char *description() { return "EtherTap retransmit"; } }; friend class TxEvent; diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc index 0ad80e077..358dda0d8 100644 --- a/src/dev/uart8250.cc +++ b/src/dev/uart8250.cc @@ -58,7 +58,7 @@ Uart8250::IntrEvent::IntrEvent(Uart8250 *u, int bit) const char * Uart8250::IntrEvent::description() { - return "uart interrupt delay event"; + return "uart interrupt delay"; } void diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 7af764437..acae2f126 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -146,7 +146,7 @@ class Bridge : public MemObject virtual void process() { port->trySend(); } - virtual const char *description() { return "bridge send event"; } + virtual const char *description() { return "bridge send"; } }; SendEvent sendEvent; -- cgit v1.2.3 From d10a843723009ddee79cdbf94a46704df1e5cee6 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 17:51:29 -0700 Subject: Get rid of obsolete fixPacket() functions. Handled by Packet::checkFunctional() now. --HG-- extra : convert_revision : 63642254e2789c80a369ac269f317ec054ffe3c0 --- src/mem/packet.cc | 25 ------------------------- src/mem/packet.hh | 16 ---------------- src/mem/tport.cc | 7 ++----- 3 files changed, 2 insertions(+), 46 deletions(-) diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 55fe13f3c..8de02f533 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -164,31 +164,6 @@ Packet::intersect(PacketPtr p) return !(s1 > e2 || e1 < s2); } -bool -fixDelayedResponsePacket(PacketPtr func, PacketPtr timing) -{ - bool result; - - if (timing->isRead() || timing->isWrite()) { - // Ugly hack to deal with the fact that we queue the requests - // and don't convert them to responses until we issue them on - // the bus. I tried to avoid this by converting packets to - // responses right away, but this breaks during snoops where a - // responder may do the conversion before other caches have - // done the snoop. Would work if we copied the packet instead - // of just hanging on to a pointer. - MemCmd oldCmd = timing->cmd; - timing->cmd = timing->cmd.responseCommand(); - result = fixPacket(func, timing); - timing->cmd = oldCmd; - } - else { - //Don't toggle if it isn't a read/write response - result = fixPacket(func, timing); - } - - return result; -} bool Packet::checkFunctional(Addr addr, int size, uint8_t *data) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 10b9f490c..16bc6f458 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -574,22 +574,6 @@ class Packet : public FastAlloc } }; - - -/** Temporary for backwards compatibility. - */ -inline -bool fixPacket(PacketPtr func, PacketPtr timing) { - return !func->checkFunctional(timing); -} - -/** This function is a wrapper for the fixPacket field that toggles - * the hasData bit it is used when a response is waiting in the - * caches, but hasn't been marked as a response yet (so the fixPacket - * needs to get the correct value for the hasData) - */ -bool fixDelayedResponsePacket(PacketPtr func, PacketPtr timing); - std::ostream & operator<<(std::ostream &o, const Packet &p); #endif //__MEM_PACKET_HH diff --git a/src/mem/tport.cc b/src/mem/tport.cc index d6ff64608..a4f791048 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -40,11 +40,8 @@ SimpleTimingPort::checkFunctional(PacketPtr pkt) PacketPtr target = i->pkt; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->intersect(pkt)) { - if (!fixPacket(pkt, target)) { - // fixPacket returns true for continue, false for done - return; - } + if (pkt->checkFunctional(target)) { + return; } } } -- cgit v1.2.3 From 2447abe5ce6c40e61eb09430c95a592aa2445349 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 17:56:30 -0700 Subject: Can only call makeAtomicResponse() once... --HG-- extra : convert_revision : c49aade46aa64f979da35eb653b544ee5bd82f01 --- src/dev/ide_ctrl.cc | 9 +++++---- src/dev/ns_gige.cc | 2 +- src/dev/pcidev.cc | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index 01243ae73..07764aaba 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -232,8 +232,10 @@ Tick IdeController::readConfig(PacketPtr pkt) { int offset = pkt->getAddr() & PCI_CONFIG_SIZE; - if (offset < PCI_DEVICE_SPECIFIC) - return PciDev::readConfig(pkt); + if (offset < PCI_DEVICE_SPECIFIC) { + return PciDev::readConfig(pkt); + } + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); pkt->allocate(); @@ -297,7 +299,6 @@ IdeController::readConfig(PacketPtr pkt) } pkt->makeAtomicResponse(); return configDelay; - } @@ -361,6 +362,7 @@ IdeController::writeConfig(PacketPtr pkt) default: panic("invalid access size(?) for PCI configspace!\n"); } + pkt->makeAtomicResponse(); } /* Trap command register writes and enable IO/BM as appropriate as well as @@ -403,7 +405,6 @@ IdeController::writeConfig(PacketPtr pkt) bm_enabled = false; break; } - pkt->makeAtomicResponse(); return configDelay; } diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index 86f664238..17f7b433b 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt) ioEnable = false; break; } - pkt->makeAtomicResponse(); + return configDelay; } diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index 85337c841..06806f841 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -284,7 +284,6 @@ PciDev::writeConfig(PacketPtr pkt) } pkt->makeAtomicResponse(); return configDelay; - } void -- cgit v1.2.3 From 07f091d6ed63d9b54c0415eacc070c3ea67566fc Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 17:59:45 -0700 Subject: Get rid of remaining traces of obsolete CoherenceProtocol object. --HG-- extra : convert_revision : c5555b00bef1b304a84886188ad2c0dcb4d7c5b9 --- configs/common/Caches.py | 1 - configs/splash2/cluster.py | 6 +----- configs/splash2/run.py | 4 ---- src/mem/cache/BaseCache.py | 1 - tests/configs/memtest.py | 1 - tests/configs/o3-timing-mp.py | 1 - tests/configs/simple-atomic-mp.py | 1 - tests/configs/simple-timing-mp.py | 1 - tests/configs/tsunami-simple-atomic-dual.py | 1 - tests/configs/tsunami-simple-atomic.py | 1 - tests/configs/tsunami-simple-timing-dual.py | 1 - tests/configs/tsunami-simple-timing.py | 1 - 12 files changed, 1 insertion(+), 19 deletions(-) diff --git a/configs/common/Caches.py b/configs/common/Caches.py index 4bff2c8a4..43a1c6378 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -35,7 +35,6 @@ class L1Cache(BaseCache): latency = '1ns' mshrs = 10 tgts_per_mshr = 5 - protocol = CoherenceProtocol(protocol='moesi') class L2Cache(BaseCache): assoc = 8 diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py index 799b85e6c..769bdcf5a 100644 --- a/configs/splash2/cluster.py +++ b/configs/splash2/cluster.py @@ -1,4 +1,4 @@ -# Copyright (c) 2006 The Regents of The University of Michigan +# Copyright (c) 2006-2007 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -51,9 +51,6 @@ parser.add_option("-n", "--numcpus", parser.add_option("-f", "--frequency", default = "1GHz", help="Frequency of each CPU") -parser.add_option("-p", "--protocol", - default="moesi", - help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") parser.add_option("--l1size", default = "32kB") parser.add_option("--l1latency", @@ -141,7 +138,6 @@ class L1(BaseCache): block_size = 64 mshrs = 12 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol=options.protocol) # ---------------------- # Base L2 Cache Definition diff --git a/configs/splash2/run.py b/configs/splash2/run.py index d051f1f1b..ff0a9448c 100644 --- a/configs/splash2/run.py +++ b/configs/splash2/run.py @@ -48,9 +48,6 @@ parser.add_option("-n", "--numcpus", parser.add_option("-f", "--frequency", default = "1GHz", help="Frequency of each CPU") -parser.add_option("-p", "--protocol", - default="moesi", - help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)") parser.add_option("--l1size", default = "32kB") parser.add_option("--l1latency", @@ -162,7 +159,6 @@ class L1(BaseCache): block_size = 64 mshrs = 12 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol=options.protocol) # ---------------------- # Base L2 Cache Definition diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index 55b68f81f..86148f821 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -51,7 +51,6 @@ class BaseCache(MemObject): mshrs = Param.Int("number of MSHRs (max outstanding requests)") prioritizeRequests = Param.Bool(False, "always service demand misses first") - protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use") repl = Param.Repl(NULL, "replacement policy") size = Param.MemorySize("capacity in bytes") split = Param.Bool(False, "whether or not this cache is split") diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index 6fe244acf..93ea4cc0e 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -38,7 +38,6 @@ class L1(BaseCache): block_size = 64 mshrs = 12 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py index 1ac9bd2e4..fc6a72a82 100644 --- a/tests/configs/o3-timing-mp.py +++ b/tests/configs/o3-timing-mp.py @@ -39,7 +39,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py index de0793d1c..bc0ced250 100644 --- a/tests/configs/simple-atomic-mp.py +++ b/tests/configs/simple-atomic-mp.py @@ -38,7 +38,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py index 1fd0e8c3c..0b400e6b7 100644 --- a/tests/configs/simple-timing-mp.py +++ b/tests/configs/simple-timing-mp.py @@ -38,7 +38,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index 131095055..de8fe2474 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -40,7 +40,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py index 595b1aeda..2ba50273a 100644 --- a/tests/configs/tsunami-simple-atomic.py +++ b/tests/configs/tsunami-simple-atomic.py @@ -40,7 +40,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index 47fba30ff..3b1a4f5cf 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -40,7 +40,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py index 999bde087..3f18c6848 100644 --- a/tests/configs/tsunami-simple-timing.py +++ b/tests/configs/tsunami-simple-timing.py @@ -41,7 +41,6 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 - protocol = CoherenceProtocol(protocol='moesi') # ---------------------- # Base L2 Cache -- cgit v1.2.3 From 5e59739416bf195173f4b37ba9afb1cb8ae16566 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 18:03:17 -0700 Subject: Don't propagate snoops across bridges. Wouldn't work anyway. --HG-- extra : convert_revision : af29fc7d0c134f5e89dd2e814c819151350fcb38 --- src/mem/bridge.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 77178d518..92beb3d7e 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -360,6 +360,8 @@ Bridge::BridgePort::getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { otherPort->getPeerAddressRanges(resp, snoop); + // we don't allow snooping across bridges + snoop = false; } BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bridge) -- cgit v1.2.3 From 3ad761bc8e89ff034fbf5ec6d8e9661e1025dcd7 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 30 Jun 2007 20:35:42 -0700 Subject: Make CPU models use new LoadLockedReq/StoreCondReq commands. --HG-- extra : convert_revision : ab78d9d1d88c3698edfd653d71c8882e1272b781 --- src/cpu/o3/lsq_unit.hh | 5 ++++- src/cpu/o3/lsq_unit_impl.hh | 4 +++- src/cpu/ozone/lw_lsq.hh | 6 +++++- src/cpu/ozone/lw_lsq_impl.hh | 5 ++++- src/cpu/simple/atomic.cc | 36 +++++++++++++++++++++--------------- src/cpu/simple/timing.cc | 33 +++++++++++++++++++-------------- 6 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index d964b9f9f..be9224099 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -643,7 +643,10 @@ LSQUnit::read(Request *req, T &data, int load_idx) // if we the cache is not blocked, do cache access if (!lsq->cacheBlocked()) { PacketPtr data_pkt = - new Packet(req, MemCmd::ReadReq, Packet::Broadcast); + new Packet(req, + (req->isLocked() ? + MemCmd::LoadLockedReq : MemCmd::ReadReq), + Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); LSQSenderState *state = new LSQSenderState; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 810a6d29f..5ae1cc0e4 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -647,7 +647,9 @@ LSQUnit::writebackStores() memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); - MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq; + MemCmd command = + req->isSwap() ? MemCmd::SwapReq : + (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq); PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index d9e0d04ac..ba40e9ce1 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -632,7 +632,11 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n", inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + PacketPtr data_pkt = + new Packet(req, + (req->isLocked() ? + MemCmd::LoadLockedReq : Packet::ReadReq), + Packet::Broadcast); data_pkt->dataStatic(inst->memData); LSQSenderState *state = new LSQSenderState; diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index e3000288c..82191312a 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -587,7 +587,10 @@ OzoneLWLSQ::writebackStores() memcpy(inst->memData, (uint8_t *)&(*sq_it).data, req->getSize()); - PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + MemCmd command = + req->isSwap() ? MemCmd::SwapReq : + (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq); + PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); LSQSenderState *state = new LSQSenderState; diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 8e8da2fa2..01eb4873e 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -280,7 +280,10 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) // Now do the access. if (fault == NoFault) { - Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast); + Packet pkt = + Packet(req, + req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, + Packet::Broadcast); pkt.dataStatic(&data); if (req->isMmapedIpr()) @@ -370,23 +373,24 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // Now do the access. if (fault == NoFault) { - Packet pkt = - Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq, - Packet::Broadcast); - pkt.dataStatic(&data); - + MemCmd cmd = MemCmd::WriteReq; // default bool do_access = true; // flag to suppress cache access if (req->isLocked()) { + cmd = MemCmd::StoreCondReq; do_access = TheISA::handleLockedWrite(thread, req); + } else if (req->isSwap()) { + cmd = MemCmd::SwapReq; + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } } - if (req->isCondSwap()) { - assert(res); - req->setExtraData(*res); - } - if (do_access) { + Packet pkt = Packet(req, cmd, Packet::Broadcast); + pkt.dataStatic(&data); + if (req->isMmapedIpr()) { dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt); } else { @@ -395,12 +399,14 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } dcache_access = true; assert(!pkt.isError()); + + if (req->isSwap()) { + assert(res); + *res = pkt.get(); + } } - if (req->isSwap()) { - assert(res); - *res = pkt.get(); - } else if (res) { + if (res && !req->isSwap()) { *res = req->getExtraData(); } } diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index b4e4a4433..77df2c05d 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -260,7 +260,10 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) // Now do the access. if (fault == NoFault) { PacketPtr pkt = - new Packet(req, MemCmd::ReadReq, Packet::Broadcast); + new Packet(req, + (req->isLocked() ? + MemCmd::LoadLockedReq : MemCmd::ReadReq), + Packet::Broadcast); pkt->dataDynamic(new T); if (!dcachePort.sendTiming(pkt)) { @@ -350,25 +353,27 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) // Now do the access. if (fault == NoFault) { - assert(dcache_pkt == NULL); - if (req->isSwap()) - dcache_pkt = new Packet(req, MemCmd::SwapReq, Packet::Broadcast); - else - dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast); - dcache_pkt->allocate(); - dcache_pkt->set(data); - + MemCmd cmd = MemCmd::WriteReq; // default bool do_access = true; // flag to suppress cache access + assert(dcache_pkt == NULL); + if (req->isLocked()) { + cmd = MemCmd::StoreCondReq; do_access = TheISA::handleLockedWrite(thread, req); - } - if (req->isCondSwap()) { - assert(res); - req->setExtraData(*res); + } else if (req->isSwap()) { + cmd = MemCmd::SwapReq; + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } } if (do_access) { + dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast); + dcache_pkt->allocate(); + dcache_pkt->set(data); + if (!dcachePort.sendTiming(dcache_pkt)) { _status = DcacheRetry; } else { @@ -609,7 +614,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt) Fault fault = curStaticInst->completeAcc(pkt, this, traceData); - if (pkt->isRead() && pkt->req->isLocked()) { + if (pkt->isRead() && pkt->isLocked()) { TheISA::handleLockedRead(thread, pkt->req); } -- cgit v1.2.3 From ffd697e14933b3012aaaa0fb93168b2fda59ea4a Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 2 Jul 2007 01:02:35 -0700 Subject: bus.cc: Fix atomic timing issue. src/mem/bus.cc: Fix atomic timing issue. --HG-- extra : convert_revision : a22ff80cd75f83c785b0604c2a4fde2e2e9f71ef --- src/mem/bus.cc | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 83ce0f87d..34f7f4fd0 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -377,7 +377,8 @@ Bus::recvAtomic(PacketPtr pkt) // original command so that additional snoops can take place // properly MemCmd orig_cmd = pkt->cmd; - MemCmd response_cmd = MemCmd::InvalidCmd; + MemCmd snoop_response_cmd = MemCmd::InvalidCmd; + Tick snoop_response_latency = 0; int orig_src = pkt->getSrc(); Port *target_port = findPort(pkt->getAddr(), pkt->getSrc()); @@ -388,15 +389,16 @@ Bus::recvAtomic(PacketPtr pkt) // same port should not have both target addresses and snooping assert(p != target_port); if (p->getId() != pkt->getSrc()) { - p->sendAtomic(pkt); + Tick latency = p->sendAtomic(pkt); if (pkt->isResponse()) { // response from snoop agent assert(pkt->cmd != orig_cmd); assert(pkt->memInhibitAsserted()); // should only happen once - assert(response_cmd == MemCmd::InvalidCmd); + assert(snoop_response_cmd == MemCmd::InvalidCmd); // save response state - response_cmd = pkt->cmd; + snoop_response_cmd = pkt->cmd; + snoop_response_latency = latency; // restore original packet state for remaining snoopers pkt->cmd = orig_cmd; pkt->setSrc(orig_src); @@ -405,19 +407,20 @@ Bus::recvAtomic(PacketPtr pkt) } } - Tick response_time = target_port->sendAtomic(pkt); + Tick response_latency = target_port->sendAtomic(pkt); // if we got a response from a snooper, restore it here - if (response_cmd != MemCmd::InvalidCmd) { + if (snoop_response_cmd != MemCmd::InvalidCmd) { // no one else should have responded assert(!pkt->isResponse()); assert(pkt->cmd == orig_cmd); - pkt->cmd = response_cmd; + pkt->cmd = snoop_response_cmd; + response_latency = snoop_response_latency; } // why do we have this packet field and the return value both??? - pkt->finishTime = std::max(response_time, curTick + clock); - return pkt->finishTime; + pkt->finishTime = curTick + response_latency; + return response_latency; } /** Function called by the port when the bus is receiving a Functional -- cgit v1.2.3 From e9c04dad60f7a382fe94ca587fa505926dbd925c Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 2 Jul 2007 09:26:36 -0700 Subject: Fix a couple LL/SC bugs that only affected timing mode. src/cpu/simple/timing.cc: Fix swap/stq_c command bug. src/mem/packet.cc: Fix incorrect LoadLockedReq command response field. --HG-- extra : convert_revision : 7a4523be900bc2c9b1bdf2d372ce55f89ae58ae5 --- src/cpu/simple/timing.cc | 2 +- src/mem/packet.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 77df2c05d..492a669b8 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -370,7 +370,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } if (do_access) { - dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast); + dcache_pkt = new Packet(req, cmd, Packet::Broadcast); dcache_pkt->allocate(); dcache_pkt->set(data); diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 8de02f533..8cd356768 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -99,7 +99,7 @@ MemCmd::commandInfo[] = InvalidCmd, "ReadExResp" }, /* LoadLockedReq */ { SET4(IsRead, IsLocked, IsRequest, NeedsResponse), - ReadResp, "LoadLockedReq" }, + LoadLockedResp, "LoadLockedReq" }, /* LoadLockedResp */ { SET4(IsRead, IsLocked, IsResponse, HasData), InvalidCmd, "LoadLockedResp" }, -- cgit v1.2.3 From 4b68652c87f61fe0a2fd4040b79130de0846df85 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 2 Jul 2007 13:57:45 -0700 Subject: Couple more minor bug fixes for FS timing mode. src/cpu/simple/timing.cc: Fix another SC problem. src/mem/cache/cache_impl.hh: Forgot to call makeTimingResponse() on uncached timing responses. --HG-- extra : convert_revision : 5a5a58ca2053e4e8de2133205bfd37de15eb4209 --- src/cpu/simple/timing.cc | 13 +++++++------ src/mem/cache/cache_impl.hh | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 492a669b8..0c03815b5 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -356,8 +356,6 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) MemCmd cmd = MemCmd::WriteReq; // default bool do_access = true; // flag to suppress cache access - assert(dcache_pkt == NULL); - if (req->isLocked()) { cmd = MemCmd::StoreCondReq; do_access = TheISA::handleLockedWrite(thread, req); @@ -369,11 +367,14 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) } } - if (do_access) { - dcache_pkt = new Packet(req, cmd, Packet::Broadcast); - dcache_pkt->allocate(); - dcache_pkt->set(data); + // Note: need to allocate dcache_pkt even if do_access is + // false, as it's used unconditionally to call completeAcc(). + assert(dcache_pkt == NULL); + dcache_pkt = new Packet(req, cmd, Packet::Broadcast); + dcache_pkt->allocate(); + dcache_pkt->set(data); + if (do_access) { if (!dcachePort.sendTiming(dcache_pkt)) { _status = DcacheRetry; } else { diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b4c3c6359..0d76b6bec 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -698,6 +698,7 @@ Cache::handleResponse(PacketPtr pkt) if (pkt->isRead()) { target->pkt->setData(pkt->getPtr()); } + target->pkt->makeTimingResponse(); cpuSidePort->respond(target->pkt, time); } assert(!mshr->hasTargets()); -- cgit v1.2.3 From 4738649e32d06d92e6792b7ce80fcbd05627fc06 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 3 Jul 2007 00:40:31 -0400 Subject: Delete packets when we're done with them. --HG-- extra : convert_revision : b8894d26e1ca7a6c9b736500accdaa53bfb09558 --- src/mem/cache/cache_impl.hh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 0d76b6bec..320e0be81 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -705,6 +705,8 @@ Cache::handleResponse(PacketPtr pkt) deallocate = true; } + delete pkt; + if (deallocate) { mq->deallocate(mshr); if (wasFull && !mq->isFull()) { @@ -1242,6 +1244,9 @@ Cache::MemSidePort::sendPacket() waitingOnRetry = !success; if (waitingOnRetry) { DPRINTF(CachePort, "now waiting on a retry\n"); + if (!mshr->isSimpleForward()) { + delete pkt; + } } else { myCache()->markInService(mshr); } -- cgit v1.2.3 From 3b4ff759398371ac14b7d694de1c87af245f7d42 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 14 Jul 2007 13:14:53 -0700 Subject: Fix bug in copying packet with static data pointer. --HG-- extra : convert_revision : 2fcf99f050d73e007433c1db2475f2893c5961a0 --- src/mem/packet.hh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 16bc6f458..c90842dee 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -368,14 +368,15 @@ class Packet : public FastAlloc } /** Alternate constructor for copying a packet. Copy all fields - * *except* set data allocation as static... even if the original - * packet's data was dynamic, we don't want to free it when the - * new packet is deallocated. Note that if original packet used - * dynamic data, user must guarantee that the new packet's - * lifetime is less than that of the original packet. */ + * *except* if the original packet's data was dynamic, don't copy + * that, as we can't guarantee that the new packet's lifetime is + * less than that of the original packet. In this case the new + * packet should allocate its own data. */ Packet(Packet *origPkt) : cmd(origPkt->cmd), req(origPkt->req), - data(NULL), staticData(false), dynamicData(false), arrayData(false), + data(origPkt->staticData ? origPkt->data : NULL), + staticData(origPkt->staticData), + dynamicData(false), arrayData(false), addr(origPkt->addr), size(origPkt->size), src(origPkt->src), dest(origPkt->dest), addrSizeValid(origPkt->addrSizeValid), -- cgit v1.2.3 From abd194df5c2dfd0ebf608c5d59196a08ca0ef630 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 14 Jul 2007 13:16:58 -0700 Subject: Move a couple of DPRINTFs from Cache to CachePort. --HG-- extra : convert_revision : 55a0d26660aeb8f63b41897d53e6b2d1f0a163be --- src/mem/cache/base_cache.hh | 2 +- src/mem/cache/cache_impl.hh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index fcc040bd9..46414974b 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -126,7 +126,7 @@ class BaseCache : public MemObject void requestBus(RequestCause cause, Tick time) { - DPRINTF(Cache, "Asserting bus request for cause %d\n", cause); + DPRINTF(CachePort, "Asserting bus request for cause %d\n", cause); if (!waitingOnRetry) { schedSendEvent(time); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 320e0be81..b159df84a 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1238,7 +1238,8 @@ Cache::MemSidePort::sendPacket() MSHR *mshr = dynamic_cast(pkt->senderState); bool success = sendTiming(pkt); - DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + DPRINTF(CachePort, + "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "successful" : "unsuccessful"); waitingOnRetry = !success; -- cgit v1.2.3 From 15a51d0cae01defc116c9a937bfa8c4577f72826 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 14 Jul 2007 13:28:52 -0700 Subject: Add CacheRepl trace flag and move a couple DPRINTFs to it. --HG-- extra : convert_revision : 31724d19ebdf2cdc2a2bafff83d17845b3a0b183 --- src/base/traceflags.py | 1 + src/mem/cache/tags/lru.cc | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 70fadb210..8573eb9bf 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -47,6 +47,7 @@ baseFlags = [ 'BusBridge', 'Cache', 'CachePort', + 'CacheRepl', 'Chains', 'Checker', 'Clock', diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 3269aa4db..0a8587c20 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -173,7 +173,7 @@ LRU::findBlock(Addr addr, int &lat) if (blk != NULL) { // move this block to head of the MRU list sets[set].moveToHead(blk); - DPRINTF(Cache, "set %x: moving blk %x to MRU\n", + DPRINTF(CacheRepl, "set %x: moving blk %x to MRU\n", set, regenerateBlkAddr(tag, set)); if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) { @@ -208,7 +208,7 @@ LRU::findReplacement(Addr addr, PacketList &writebacks) ++sampledRefs; blk->refCount = 0; - DPRINTF(Cache, "set %x: selecting blk %x for replacement\n", + DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n", set, regenerateBlkAddr(blk->tag, set)); } else if (!blk->isTouched) { tagsInUse++; -- cgit v1.2.3 From 4bcfa916f1f12e8cda253ca7154e75fa1f71ca44 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 14 Jul 2007 23:49:24 -0700 Subject: New tree-based algorithm for creating more complex cache hierarchies. --HG-- extra : convert_revision : de8dd4ef5dae0f3e084461e8ef7c549653e61d3f --- configs/example/memtest.py | 143 +++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 70 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 0e6260b5d..47853ffab 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -49,6 +49,10 @@ parser.add_option("-n", "--numtesters", type="int", default=8, metavar="N", help="Number of tester pseudo-CPUs [default: %default]") +parser.add_option("-t", "--treespec", type="string", + help="Colon-separated multilevel tree specification") + + parser.add_option("-f", "--functional", type="int", default=0, metavar="PCT", help="Target percentage of functional accesses " @@ -69,84 +73,83 @@ if args: print "Error: script doesn't take any positional arguments" sys.exit(1) -# Should generalize this someday... would be cool to have a loop that -# just iterates, adding a level of caching each time. -#if options.cache_levels != 2 and options.cache_levels != 0: -# print "Error: number of cache levels must be 0 or 2" -# sys.exit(1) - -if options.blocking: - num_l1_mshrs = 1 - num_l2_mshrs = 1 -else: - num_l1_mshrs = 12 - num_l2_mshrs = 92 - block_size = 64 -# -------------------- -# Base L1 Cache -# ==================== - -class L1(BaseCache): - latency = '1ns' - block_size = block_size - mshrs = num_l1_mshrs - tgts_per_mshr = 8 - -# ---------------------- -# Base L2 Cache -# ---------------------- - -class L2(BaseCache): - block_size = block_size - latency = '10ns' - mshrs = num_l2_mshrs - tgts_per_mshr = 16 - write_buffers = 8 - -if options.numtesters > block_size: +if not options.treespec: + # convert simple cache_levels option to treespec + treespec = [options.numtesters, 1] + numtesters = options.numtesters +else: + try: + treespec = [int(x) for x in options.treespec.split(':')] + numtesters = reduce(lambda x,y: x*y, treespec) + except: + print "Error parsing treespec option" + sys.exit(1) + +if numtesters > block_size: print "Error: Number of testers limited to %s because of false sharing" \ % (block_size) sys.exit(1) -cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, - percent_functional=options.functional, - percent_uncacheable=options.uncacheable, - progress_interval=options.progress) - for i in xrange(options.numtesters) ] +if len(treespec) < 1: + print "Error parsing treespec" + sys.exit(1) -# system simulated -system = System(cpu = cpus, funcmem = PhysicalMemory(), - physmem = PhysicalMemory(latency = "100ns"), - membus = Bus(clock="500MHz", width=16)) - -# l2cache & bus -if options.cache_levels == 2: - system.toL2Bus = Bus(clock="500MHz", width=16) - system.l2c = L2(size='64kB', assoc=8) - system.l2c.cpu_side = system.toL2Bus.port - - # connect l2c to membus - system.l2c.mem_side = system.membus.port - -# add L1 caches -for cpu in cpus: - if options.cache_levels == 2: - cpu.l1c = L1(size = '32kB', assoc = 4) - cpu.test = cpu.l1c.cpu_side - cpu.l1c.mem_side = system.toL2Bus.port - elif options.cache_levels == 1: - cpu.l1c = L1(size = '32kB', assoc = 4) - cpu.test = cpu.l1c.cpu_side - cpu.l1c.mem_side = system.membus.port - else: - cpu.test = system.membus.port - system.funcmem.port = cpu.functional - -# connect memory to membus -system.physmem.port = system.membus.port +# define prototype L1 cache +proto_l1 = BaseCache(size = '32kB', assoc = 4, block_size = block_size, + latency = '1ns', tgts_per_mshr = 8) +if options.blocking: + proto_l1.mshrs = 1 +else: + proto_l1.mshrs = 8 + +# build a list of prototypes, one for each cache level (L1 is at end, +# followed by the tester pseudo-cpu objects) +prototypes = [ proto_l1, + MemTest(atomic=options.atomic, max_loads=options.maxloads, + percent_functional=options.functional, + percent_uncacheable=options.uncacheable, + progress_interval=options.progress) ] + +while len(prototypes) < len(treespec): + # clone previous level and update params + prev = prototypes[0] + next = prev() + next.size = prev.size * 4 + next.latency = prev.latency * 10 + next.assoc = prev.assoc * 2 + prototypes.insert(0, next) + +# system simulated +system = System(funcmem = PhysicalMemory(), + physmem = PhysicalMemory(latency = "100ns")) + +def make_level(spec, prototypes, attach_obj, attach_port): + fanout = spec[0] + parent = attach_obj # use attach obj as config parent too + if fanout > 1: + new_bus = Bus(clock="500MHz", width=16) + new_bus.port = getattr(attach_obj, attach_port) + parent.cpu_side_bus = new_bus + attach_obj = new_bus + attach_port = "port" + objs = [prototypes[0]() for i in xrange(fanout)] + if len(spec) > 1: + # we just built caches, more levels to go + parent.cache = objs + for cache in objs: + cache.mem_side = getattr(attach_obj, attach_port) + make_level(spec[1:], prototypes[1:], cache, "cpu_side") + else: + # we just built the MemTest objects + parent.cpu = objs + for t in objs: + t.test = getattr(attach_obj, attach_port) + t.functional = system.funcmem.port + +make_level(treespec, prototypes, system.physmem, "port") # ----------------------- # run simulation -- cgit v1.2.3 From ad560a6642fbb752e608c02048fc2103e60093b3 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 13:22:49 -0700 Subject: Add --force-bus option to memtest.py. --HG-- extra : convert_revision : 101735cca426903704ff2edaff051fa7c5bfc46c --- configs/example/memtest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index 47853ffab..c9149865a 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -52,6 +52,8 @@ parser.add_option("-n", "--numtesters", type="int", default=8, parser.add_option("-t", "--treespec", type="string", help="Colon-separated multilevel tree specification") +parser.add_option("--force-bus", action="store_true", + help="Use bus between levels even with single cache") parser.add_option("-f", "--functional", type="int", default=0, metavar="PCT", @@ -129,7 +131,7 @@ system = System(funcmem = PhysicalMemory(), def make_level(spec, prototypes, attach_obj, attach_port): fanout = spec[0] parent = attach_obj # use attach obj as config parent too - if fanout > 1: + if fanout > 1 or options.force_bus: new_bus = Bus(clock="500MHz", width=16) new_bus.port = getattr(attach_obj, attach_port) parent.cpu_side_bus = new_bus -- cgit v1.2.3 From b1bdc3b3d9de40387a209777aa972f96792c8d6a Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 14:07:31 -0700 Subject: Punt on old -n/-c memtest args. Also added comments to document treespec format. --HG-- extra : convert_revision : fa9e8f66b68b96a4efca8a7fe6e7c37367382d9d --- configs/example/memtest.py | 51 +++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index c9149865a..e7f39d8bd 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -33,24 +33,38 @@ m5.AddToPath('../common') parser = optparse.OptionParser() -parser.add_option("-c", "--cache-levels", type="int", default=2, - metavar="LEVELS", - help="Number of cache levels [default: %default]") parser.add_option("-a", "--atomic", action="store_true", help="Use atomic (non-timing) mode") parser.add_option("-b", "--blocking", action="store_true", help="Use blocking caches") -parser.add_option("-l", "--maxloads", default="1G", metavar="N", - help="Stop after N loads [default: %default]") +parser.add_option("-l", "--maxloads", metavar="N", + help="Stop after N loads") parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick, metavar="T", help="Stop after T ticks") -parser.add_option("-n", "--numtesters", type="int", default=8, - metavar="N", - help="Number of tester pseudo-CPUs [default: %default]") -parser.add_option("-t", "--treespec", type="string", - help="Colon-separated multilevel tree specification") +# +# The "tree" specification is a colon-separated list of one or more +# integers. The first integer is the number of caches/testers +# connected directly to main memory. The last integer in the list is +# the number of testers associated with the uppermost level of memory +# (L1 cache, if there are caches, or main memory if no caches). Thus +# if there is only one integer, there are no caches, and the integer +# specifies the number of testers connected directly to main memory. +# The other integers (if any) specify the number of caches at each +# level of the hierarchy between. +# +# Examples: +# +# "2:1" Two caches connected to memory with a single tester behind each +# (single-level hierarchy, two testers total) +# +# "2:2:1" Two-level hierarchy, 2 L1s behind each of 2 L2s, 4 testers total +# +parser.add_option("-t", "--treespec", type="string", default="8:1", + help="Colon-separated multilevel tree specification, " + "see script comments for details " + "[default: %default]") parser.add_option("--force-bus", action="store_true", help="Use bus between levels even with single cache") @@ -77,17 +91,12 @@ if args: block_size = 64 -if not options.treespec: - # convert simple cache_levels option to treespec - treespec = [options.numtesters, 1] - numtesters = options.numtesters -else: - try: - treespec = [int(x) for x in options.treespec.split(':')] - numtesters = reduce(lambda x,y: x*y, treespec) - except: - print "Error parsing treespec option" - sys.exit(1) +try: + treespec = [int(x) for x in options.treespec.split(':')] + numtesters = reduce(lambda x,y: x*y, treespec) +except: + print "Error parsing treespec option" + sys.exit(1) if numtesters > block_size: print "Error: Number of testers limited to %s because of false sharing" \ -- cgit v1.2.3 From 9172876dd7ba4877c586ced30904548539451f37 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 14:32:55 -0700 Subject: Fix problem with unset max_loads in memtest. Also make default 0, and make that mean run forever. --HG-- extra : convert_revision : 3e60a52b1c5e334a9ef3d744cf7ee1d851ba4aa9 --- configs/example/memtest.py | 14 +++++++++----- src/cpu/memtest/MemTest.py | 2 +- src/cpu/memtest/memtest.cc | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index e7f39d8bd..af100c9a9 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -37,7 +37,7 @@ parser.add_option("-a", "--atomic", action="store_true", help="Use atomic (non-timing) mode") parser.add_option("-b", "--blocking", action="store_true", help="Use blocking caches") -parser.add_option("-l", "--maxloads", metavar="N", +parser.add_option("-l", "--maxloads", metavar="N", default=0, help="Stop after N loads") parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick, metavar="T", @@ -116,14 +116,18 @@ if options.blocking: else: proto_l1.mshrs = 8 -# build a list of prototypes, one for each cache level (L1 is at end, -# followed by the tester pseudo-cpu objects) -prototypes = [ proto_l1, - MemTest(atomic=options.atomic, max_loads=options.maxloads, +# build a list of prototypes, one for each level of treespec, starting +# at the end (last entry is tester objects) +prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads, percent_functional=options.functional, percent_uncacheable=options.uncacheable, progress_interval=options.progress) ] +# next comes L1 cache, if any +if len(treespec) > 1: + prototypes.insert(0, proto_l1) + +# now add additional cache levels (if any) by scaling L1 params while len(prototypes) < len(treespec): # clone previous level and update params prev = prototypes[0] diff --git a/src/cpu/memtest/MemTest.py b/src/cpu/memtest/MemTest.py index 381519972..a328f4734 100644 --- a/src/cpu/memtest/MemTest.py +++ b/src/cpu/memtest/MemTest.py @@ -33,7 +33,7 @@ from m5 import build_env class MemTest(SimObject): type = 'MemTest' - max_loads = Param.Counter("number of loads to execute") + max_loads = Param.Counter(0, "number of loads to execute") atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n") memory_size = Param.Int(65536, "memory size") percent_dest_unaligned = Param.Percent(50, diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 019b4328c..db3ca282a 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -232,7 +232,7 @@ MemTest::completeRequest(PacketPtr pkt) nextProgressMessage += progressInterval; } - if (numReads >= maxLoads) + if (maxLoads != 0 && numReads >= maxLoads) exitSimLoop("maximum number of loads reached"); break; -- cgit v1.2.3 From f790f34fe30aaca22b829104a8cf3f547624132a Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 20:09:03 -0700 Subject: Make Bus::findPort() a little more useful. Move check for loops outside, since half the call sites end up working around it anyway. Return integer port ID instead of port object pointer. --HG-- extra : convert_revision : 4c31fe9930f4d1aa4919e764efb7c50d43792ea3 --- src/mem/bus.cc | 47 ++++++++++++++++++++++++----------------------- src/mem/bus.hh | 6 ++---- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 0cb1240f3..24a0c6f02 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -172,7 +172,7 @@ void Bus::occupyBus(PacketPtr pkt) bool Bus::recvTiming(PacketPtr pkt) { - Port *port; + int port_id; DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); @@ -196,8 +196,8 @@ Bus::recvTiming(PacketPtr pkt) // Make sure to clear the snoop commit flag so it doesn't think an // access has been handled twice. if (dest == Packet::Broadcast) { - port = findPort(pkt->getAddr(), pkt->getSrc()); - timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); + port_id = findPort(pkt->getAddr()); + timingSnoop(pkt, interfaces[port_id]); if (pkt->memInhibitAsserted()) { //Cache-Cache transfer occuring @@ -213,13 +213,13 @@ Bus::recvTiming(PacketPtr pkt) } else { assert(dest >= 0 && dest < maxId); assert(dest != pkt->getSrc()); // catch infinite loops - port = interfaces[dest]; + port_id = dest; } occupyBus(pkt); - if (port) { - if (port->sendTiming(pkt)) { + if (port_id != pkt->getSrc()) { + if (interfaces[port_id]->sendTiming(pkt)) { // Packet was successfully sent. Return true. // Also take care of retries if (inRetry) { @@ -279,8 +279,8 @@ Bus::recvRetry(int id) } } -Port * -Bus::findPort(Addr addr, int id) +int +Bus::findPort(Addr addr) { /* An interval tree would be a better way to do this. --ali. */ int dest_id = -1; @@ -295,7 +295,7 @@ Bus::findPort(Addr addr, int id) iter != defaultRange.end(); iter++) { if (*iter == addr) { DPRINTF(Bus, " found addr %#llx on default\n", addr); - return defaultPort; + return defaultId; } } @@ -306,18 +306,11 @@ Bus::findPort(Addr addr, int id) DPRINTF(Bus, "Unable to find destination for addr: %#llx, will use " "default port", addr); - return defaultPort; + return defaultId; } } - - // we shouldn't be sending this back to where it came from - // do the snoop access and then we should terminate - // the cyclical call. - if (dest_id == id) - return 0; - - return interfaces[dest_id]; + return dest_id; } void @@ -380,7 +373,8 @@ Bus::recvAtomic(PacketPtr pkt) Tick snoop_response_latency = 0; int orig_src = pkt->getSrc(); - Port *target_port = findPort(pkt->getAddr(), pkt->getSrc()); + int target_port_id = findPort(pkt->getAddr()); + Port *target_port = interfaces[target_port_id]; SnoopIter s_end = snoopPorts.end(); for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { @@ -406,7 +400,13 @@ Bus::recvAtomic(PacketPtr pkt) } } - Tick response_latency = target_port->sendAtomic(pkt); + Tick response_latency = 0; + + // we can get requests sent up from the memory side of the bus for + // snooping... don't send them back down! + if (target_port_id != pkt->getSrc()) { + response_latency = target_port->sendAtomic(pkt); + } // if we got a response from a snooper, restore it here if (snoop_response_cmd != MemCmd::InvalidCmd) { @@ -431,11 +431,12 @@ Bus::recvFunctional(PacketPtr pkt) pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - Port* port = findPort(pkt->getAddr(), pkt->getSrc()); - functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]); + int port_id = findPort(pkt->getAddr()); + Port *port = interfaces[port_id]; + functionalSnoop(pkt, port); // If the snooping hasn't found what we were looking for, keep going. - if (!pkt->isResponse() && port) { + if (!pkt->isResponse() && port_id != pkt->getSrc()) { port->sendFunctional(pkt); } } diff --git a/src/mem/bus.hh b/src/mem/bus.hh index bd51337ed..a19420244 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -176,11 +176,9 @@ class Bus : public MemObject /** Find which port connected to this bus (if any) should be given a packet * with this address. * @param addr Address to find port for. - * @param id Id of the port this packet was received from (to prevent - * loops) - * @return pointer to port that the packet should be sent out of. + * @return id of port that the packet should be sent out of. */ - Port *findPort(Addr addr, int id); + int findPort(Addr addr); /** Snoop all relevant ports functionally. */ void functionalSnoop(PacketPtr pkt, Port *responder); -- cgit v1.2.3 From 884807a68ad7e4f390660b3becfe4ee094334e95 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 20:11:06 -0700 Subject: Fix up a bunch of multilevel coherence issues. Atomic mode seems to work. Timing is closer but not there yet. --HG-- extra : convert_revision : 0dea5c3d4b973d009e9d4a4c21b9cad15961d56f --- configs/example/memtest.py | 2 +- src/cpu/memtest/memtest.cc | 4 +- src/cpu/o3/lsq_impl.hh | 7 ++-- src/mem/bus.cc | 5 ++- src/mem/cache/cache_impl.hh | 94 ++++++++++++++++++++++++++++++++++++++++----- src/mem/packet.hh | 12 ++++-- 6 files changed, 105 insertions(+), 19 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index af100c9a9..5bb874e85 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -144,7 +144,7 @@ system = System(funcmem = PhysicalMemory(), def make_level(spec, prototypes, attach_obj, attach_port): fanout = spec[0] parent = attach_obj # use attach obj as config parent too - if fanout > 1 or options.force_bus: + if len(spec) > 1 and (fanout > 1 or options.force_bus): new_bus = Bus(clock="500MHz", width=16) new_bus.port = getattr(attach_obj, attach_port) parent.cpu_side_bus = new_bus diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index db3ca282a..f5c8bb93b 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -64,7 +64,9 @@ MemTest::CpuPort::recvTiming(PacketPtr pkt) Tick MemTest::CpuPort::recvAtomic(PacketPtr pkt) { - panic("MemTest doesn't expect recvAtomic callback!"); + // must be snoop upcall + assert(pkt->isRequest()); + assert(pkt->getDest() == Packet::Broadcast); return curTick; } diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index b4a6a02da..10c0afd38 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -84,9 +84,10 @@ LSQ::DcachePort::recvTiming(PacketPtr pkt) lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); } else { - //else it is a coherence request, maybe you need to do something - warn("Recieved a coherence request (Invalidate?), 03CPU doesn't" - "update LSQ for these\n"); + // must be a snoop + + // @TODO someday may need to process invalidations in LSQ here + // to provide stronger consistency model } return true; } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 24a0c6f02..e70558bd6 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -183,8 +183,9 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (tickNextIdle > curTick || - (retryList.size() && (!inRetry || pktPort != retryList.front()))) + if (!pkt->isExpressSnoop() && + (tickNextIdle > curTick || + (retryList.size() && (!inRetry || pktPort != retryList.front())))) { addToRetryList(pktPort); DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n"); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b159df84a..59571dd6f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -165,11 +165,25 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) blk->trackLoadLocked(pkt); } pkt->setDataFromBlock(blk->data, blkSize); + if (pkt->getSize() == blkSize) { + // special handling for coherent block requests from + // upper-level caches + if (pkt->needsExclusive()) { + // on ReadExReq we give up our copy + tags->invalidateBlk(blk); + } else { + // on ReadReq we create shareable copies here and in + // the requester + pkt->assertShared(); + blk->status &= ~BlkWritable; + } + } } else { // Not a read or write... must be an upgrade. it's OK // to just ack those as long as we have an exclusive // copy at this level. assert(pkt->cmd == MemCmd::UpgradeReq); + tags->invalidateBlk(blk); } } @@ -269,6 +283,18 @@ Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; satisfied = true; satisfyCpuSideRequest(pkt, blk); + } else if (pkt->cmd == MemCmd::Writeback) { + // special case: writeback to read-only block (e.g., from + // L1 into L2). since we're really just passing ownership + // from one cache to another, we can update this cache to + // be the owner without making the block writeable + assert(!blk->isWritable() /* && !blk->isDirty() */); + assert(blkSize == pkt->getSize()); + std::memcpy(blk->data, pkt->getPtr(), blkSize); + blk->status |= BlkDirty; + satisfied = true; + // nothing else to do; writeback doesn't expect response + assert(!pkt->needsResponse()); } else { // permission violation... nothing to do here, leave unsatisfied // for statistics purposes this counts like a complete miss @@ -363,9 +389,10 @@ Cache::timingAccess(PacketPtr pkt) bool needsResponse = pkt->needsResponse(); if (satisfied) { - assert(needsResponse); - pkt->makeTimingResponse(); - cpuSidePort->respond(pkt, curTick+lat); + if (needsResponse) { + pkt->makeTimingResponse(); + cpuSidePort->respond(pkt, curTick+lat); + } } else { // miss if (prefetchMiss) @@ -456,10 +483,30 @@ Cache::atomicAccess(PacketPtr pkt) { int lat = hitLatency; + // @TODO: make this a parameter + bool last_level_cache = false; + if (pkt->memInhibitAsserted()) { - DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", - pkt->getAddr()); assert(!pkt->req->isUncacheable()); + // have to invalidate ourselves and any lower caches even if + // upper cache will be responding + if (pkt->isInvalidate()) { + BlkType *blk = tags->findBlock(pkt->getAddr()); + if (blk && blk->isValid()) { + tags->invalidateBlk(blk); + DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: invalidating\n", + pkt->cmdString(), pkt->getAddr()); + } + if (!last_level_cache) { + DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n", + pkt->cmdString(), pkt->getAddr()); + lat += memSidePort->sendAtomic(pkt); + } + } else { + DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n", + pkt->cmdString(), pkt->getAddr()); + } + return lat; } @@ -791,9 +838,7 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, assert(pkt->isRead() || blk->isValid()); } - if (pkt->needsExclusive()) { - blk->status = BlkValid | BlkWritable | BlkDirty; - } else if (!pkt->sharedAsserted()) { + if (pkt->needsExclusive() || !pkt->sharedAsserted()) { blk->status = BlkValid | BlkWritable; } else { blk->status = BlkValid; @@ -839,6 +884,37 @@ void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, bool is_timing, bool is_deferred) { + assert(pkt->isRequest()); + + // first propagate snoop upward to see if anyone above us wants to + // handle it. save & restore packet src since it will get + // rewritten to be relative to cpu-side bus (if any) + bool alreadySupplied = pkt->memInhibitAsserted(); + bool upperSupply = false; + if (is_timing) { + Packet *snoopPkt = new Packet(pkt, true); // clear flags + snoopPkt->setExpressSnoop(); + cpuSidePort->sendTiming(snoopPkt); + if (snoopPkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache + assert(!alreadySupplied); + pkt->assertMemInhibit(); + } + if (snoopPkt->sharedAsserted()) { + pkt->assertShared(); + } + delete snoopPkt; + } else { + int origSrc = pkt->getSrc(); + cpuSidePort->sendAtomic(pkt); + if (!alreadySupplied && pkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache: + // forward response to original requester + assert(pkt->isResponse()); + } + pkt->setSrc(origSrc); + } + if (!blk || !blk->isValid()) { return; } @@ -846,7 +922,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // we may end up modifying both the block state and the packet (if // we respond in atomic mode), so just figure out what to do now // and then do it later - bool supply = blk->isDirty() && pkt->isRead(); + bool supply = blk->isDirty() && pkt->isRead() && !upperSupply; bool invalidate = pkt->isInvalidate(); if (pkt->isRead() && !pkt->isInvalidate()) { diff --git a/src/mem/packet.hh b/src/mem/packet.hh index c90842dee..036bd3fd7 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -252,9 +252,11 @@ class Packet : public FastAlloc bool destValid; enum Flag { - // Snoop flags + // Snoop response flags MemInhibit, Shared, + // Special control flags + ExpressSnoop, NUM_PACKET_FLAGS }; @@ -317,6 +319,10 @@ class Packet : public FastAlloc bool memInhibitAsserted() { return flags[MemInhibit]; } bool sharedAsserted() { return flags[Shared]; } + // Special control flags + void setExpressSnoop() { flags[ExpressSnoop] = true; } + bool isExpressSnoop() { return flags[ExpressSnoop]; } + // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command // field, etc.) @@ -372,7 +378,7 @@ class Packet : public FastAlloc * that, as we can't guarantee that the new packet's lifetime is * less than that of the original packet. In this case the new * packet should allocate its own data. */ - Packet(Packet *origPkt) + Packet(Packet *origPkt, bool clearFlags = false) : cmd(origPkt->cmd), req(origPkt->req), data(origPkt->staticData ? origPkt->data : NULL), staticData(origPkt->staticData), @@ -381,7 +387,7 @@ class Packet : public FastAlloc src(origPkt->src), dest(origPkt->dest), addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid), destValid(origPkt->destValid), - flags(origPkt->flags), + flags(clearFlags ? 0 : origPkt->flags), time(curTick), senderState(origPkt->senderState) { } -- cgit v1.2.3 From e80ab26abc325db0c99b153aa9758ad0df66c77c Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 23:30:22 -0400 Subject: Add valgrind-suppressions file. --HG-- extra : convert_revision : 8c912aa723f2532c728b47a0bd83c3f3f27d7dfe --- util/valgrind-suppressions | 113 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 util/valgrind-suppressions diff --git a/util/valgrind-suppressions b/util/valgrind-suppressions new file mode 100644 index 000000000..861eb5c3d --- /dev/null +++ b/util/valgrind-suppressions @@ -0,0 +1,113 @@ +{ + python error + Memcheck:Cond + fun:PyObject_Free +} +{ + python error + Memcheck:Addr1 + fun:PyObject_Free +} +{ + python error + Memcheck:Addr2 + fun:PyObject_Free +} +{ + python error + Memcheck:Addr4 + fun:PyObject_Free +} +{ + python error + Memcheck:Addr8 + fun:PyObject_Free +} +{ + python error + Memcheck:Addr16 + fun:PyObject_Free +} +{ + python error + Memcheck:Value1 + fun:PyObject_Free +} +{ + python error + Memcheck:Value2 + fun:PyObject_Free +} +{ + python error + Memcheck:Value4 + fun:PyObject_Free +} +{ + python error + Memcheck:Value8 + fun:PyObject_Free +} +{ + python error + Memcheck:Value16 + fun:PyObject_Free +} + +{ + python error + Memcheck:Cond + fun:PyObject_Realloc +} +{ + python error + Memcheck:Addr1 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Addr2 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Addr4 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Addr8 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Addr16 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Value1 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Value2 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Value4 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Value8 + fun:PyObject_Realloc +} +{ + python error + Memcheck:Value16 + fun:PyObject_Realloc +} + + -- cgit v1.2.3 From f67c8b33cc57d38b102154c540456ee2c0444e63 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 21:03:12 -0700 Subject: Fix bug with timing snoop upcalls to MemTest object. --HG-- extra : convert_revision : 1940a5d231b4f856cf69578f68ea98435824dbd8 --- src/cpu/memtest/memtest.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index f5c8bb93b..77816e8d1 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -57,7 +57,13 @@ int TESTER_ALLOCATOR=0; bool MemTest::CpuPort::recvTiming(PacketPtr pkt) { - memtest->completeRequest(pkt); + if (pkt->isResponse()) { + memtest->completeRequest(pkt); + } else { + // must be snoop upcall + assert(pkt->isRequest()); + assert(pkt->getDest() == Packet::Broadcast); + } return true; } -- cgit v1.2.3 From ff13827ccb559890f05b2e1d97bc6ecf86f9dd16 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 17 Jul 2007 06:23:11 -0700 Subject: Assert that an mshr has a target in getTarget(). --HG-- extra : convert_revision : 08091670fc319876012ed139fcd2584c364a980c --- src/mem/cache/miss/mshr.hh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 293f290b8..a27f465aa 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -181,11 +181,17 @@ public: */ TargetList* getTargetList() { return &targets; } + /** + * Returns true if there are targets left. + * @return true if there are targets + */ + bool hasTargets() { return !targets.empty(); } + /** * Returns a reference to the first target. * @return A pointer to the first target. */ - Target *getTarget() { return &targets.front(); } + Target *getTarget() { assert(hasTargets()); return &targets.front(); } /** * Pop first target. @@ -196,12 +202,6 @@ public: targets.pop_front(); } - /** - * Returns true if there are targets left. - * @return true if there are targets - */ - bool hasTargets() { return !targets.empty(); } - bool isSimpleForward() { if (getNumTargets() != 1) -- cgit v1.2.3 From a25f3ac67f9c467cf945b45fbc0f4e587e640cab Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 17 Jul 2007 06:33:28 -0700 Subject: Forward cache-to-cache responses through other caches. --HG-- extra : convert_revision : 5b6a02255bccd98b00949703cf4ba4b221553cea --- src/mem/cache/cache_impl.hh | 56 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 59571dd6f..c069d8ba9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -315,6 +315,29 @@ Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) } +class ForwardResponseRecord : public Packet::SenderState +{ + Packet::SenderState *prevSenderState; + int prevSrc; +#ifndef NDEBUG + BaseCache *cache; +#endif + public: + ForwardResponseRecord(Packet *pkt, BaseCache *_cache) + : prevSenderState(pkt->senderState), prevSrc(pkt->getSrc()) +#ifndef NDEBUG + , cache(_cache) +#endif + {} + void restore(Packet *pkt, BaseCache *_cache) + { + assert(_cache == cache); + pkt->senderState = prevSenderState; + pkt->setDest(prevSrc); + } +}; + + template bool Cache::timingAccess(PacketPtr pkt) @@ -325,6 +348,19 @@ Cache::timingAccess(PacketPtr pkt) // we charge hitLatency for doing just about anything here Tick time = curTick + hitLatency; + if (pkt->isResponse()) { + // must be cache-to-cache response from upper to lower level + ForwardResponseRecord *rec = + dynamic_cast(pkt->senderState); + assert(rec != NULL); + rec->restore(pkt, this); + delete rec; + memSidePort->respond(pkt, time); + return true; + } + + assert(pkt->isRequest()); + if (pkt->memInhibitAsserted()) { DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", pkt->getAddr()); @@ -392,6 +428,8 @@ Cache::timingAccess(PacketPtr pkt) if (needsResponse) { pkt->makeTimingResponse(); cpuSidePort->respond(pkt, curTick+lat); + } else { + delete pkt; } } else { // miss @@ -424,12 +462,6 @@ Cache::timingAccess(PacketPtr pkt) } } - if (!needsResponse) { - // Need to clean up the packet on a writeback miss, but leave - // the request for the next level. - delete pkt; - } - return true; } @@ -872,7 +904,14 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, { // timing-mode snoop responses require a new packet, unless we // already made a copy... - PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt); + PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt, true); + if (!req_pkt->isInvalidate()) { + // note that we're ignoring the shared flag on req_pkt... it's + // basically irrelveant, as we'll always assert shared unless + // it's an exclusive request, in which case the shared line + // should never be asserted1 + pkt->assertShared(); + } pkt->allocate(); pkt->makeTimingResponse(); pkt->setDataFromBlock(blk_data, blkSize); @@ -894,11 +933,14 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); + snoopPkt->senderState = new ForwardResponseRecord(pkt, this); cpuSidePort->sendTiming(snoopPkt); if (snoopPkt->memInhibitAsserted()) { // cache-to-cache response from some upper cache assert(!alreadySupplied); pkt->assertMemInhibit(); + } else { + delete snoopPkt->senderState; } if (snoopPkt->sharedAsserted()) { pkt->assertShared(); -- cgit v1.2.3 From a67a0025b3da9605f1cd41c75bff5dba2175a0dd Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 17 Jul 2007 08:15:23 -0700 Subject: Make sure responses never get blocked. --HG-- extra : convert_revision : 29f359d743994a94dc403aa0621ba72cd137d1a1 --- src/mem/bus.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index e70558bd6..da8df06ea 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -183,7 +183,7 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (!pkt->isExpressSnoop() && + if (!(pkt->isResponse() || pkt->isExpressSnoop()) && (tickNextIdle > curTick || (retryList.size() && (!inRetry || pktPort != retryList.front())))) { @@ -194,8 +194,6 @@ Bus::recvTiming(PacketPtr pkt) short dest = pkt->getDest(); - // Make sure to clear the snoop commit flag so it doesn't think an - // access has been handled twice. if (dest == Packet::Broadcast) { port_id = findPort(pkt->getAddr()); timingSnoop(pkt, interfaces[port_id]); @@ -234,6 +232,8 @@ Bus::recvTiming(PacketPtr pkt) } // Packet not successfully sent. Leave or put it on the retry list. + // illegal to block responses... can lead to deadlock + assert(!pkt->isResponse()); DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n", pktPort->getId()); addToRetryList(pktPort); -- cgit v1.2.3 From 91178600947e174041f46f54e4241cedd01bbb34 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 21 Jul 2007 13:45:17 -0700 Subject: Several more fixes for multi-level timing coherence. - Add "deferred snoop" flag to Packet so upper-level caches can distinguish whether lower-level cache request was in-service or not at the time of the original snoop. - Revamp response handling to properly handle deferred snoops on non-cache-fill requests (i.e. upgrades). - Make sure forwarded writebacks are kept in write buffer at lower-level caches so they get snooped properly. --HG-- extra : convert_revision : 17f8a3772a1ae31a16991a53f8225ddf54d31fc9 --- src/mem/cache/base_cache.hh | 26 +++--- src/mem/cache/cache_impl.hh | 195 ++++++++++++++++++++++---------------------- src/mem/cache/miss/mshr.cc | 18 ++-- src/mem/cache/miss/mshr.hh | 2 +- src/mem/packet.hh | 3 + 5 files changed, 122 insertions(+), 122 deletions(-) diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 46414974b..719ab0245 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -410,28 +410,28 @@ class BaseCache : public MemObject MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus) { + assert(!pkt->req->isUncacheable()); return allocateBufferInternal(&mshrQueue, blockAlign(pkt->getAddr()), blkSize, pkt, time, requestBus); } - MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus) + MSHR *allocateWriteBuffer(PacketPtr pkt, Tick time, bool requestBus) { - MSHRQueue *mq = NULL; - - if (pkt->isWrite() && !pkt->isRead()) { - /** - * @todo Add write merging here. - */ - mq = &writeBuffer; - } else { - mq = &mshrQueue; - } - - return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(), + assert(pkt->isWrite() && !pkt->isRead()); + return allocateBufferInternal(&writeBuffer, + pkt->getAddr(), pkt->getSize(), pkt, time, requestBus); } + MSHR *allocateUncachedReadBuffer(PacketPtr pkt, Tick time, bool requestBus) + { + assert(pkt->req->isUncacheable()); + assert(pkt->isRead()); + return allocateBufferInternal(&mshrQueue, + pkt->getAddr(), pkt->getSize(), + pkt, time, requestBus); + } /** * Returns true if the cache is blocked for accesses. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c069d8ba9..b78360d4a 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -369,7 +369,12 @@ Cache::timingAccess(PacketPtr pkt) } if (pkt->req->isUncacheable()) { - allocateBuffer(pkt, time, true); + // writes go in write buffer, reads use MSHR + if (pkt->isWrite() && !pkt->isRead()) { + allocateWriteBuffer(pkt, time, true); + } else { + allocateUncachedReadBuffer(pkt, time, true); + } assert(pkt->needsResponse()); // else we should delete it here?? return true; } @@ -417,7 +422,7 @@ Cache::timingAccess(PacketPtr pkt) // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, true); + allocateWriteBuffer(wbPkt, time, true); writebacks.pop_front(); } #endif @@ -458,7 +463,11 @@ Cache::timingAccess(PacketPtr pkt) // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. - allocateMissBuffer(pkt, time, true); + if (pkt->cmd == MemCmd::Writeback) { + allocateWriteBuffer(pkt, time, true); + } else { + allocateMissBuffer(pkt, time, true); + } } } @@ -492,6 +501,10 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, assert(cpu_pkt->needsResponse()); MemCmd cmd; + // @TODO make useUpgrades a parameter. + // Note that ownership protocols require upgrade, otherwise a + // write miss on a shared owned block will generate a ReadExcl, + // which will clobber the owned copy. const bool useUpgrades = true; if (blkValid && useUpgrades) { // only reason to be here is that blk is shared @@ -648,62 +661,6 @@ Cache::functionalAccess(PacketPtr pkt, ///////////////////////////////////////////////////// -template -bool -Cache::satisfyMSHR(MSHR *mshr, PacketPtr pkt, - BlkType *blk) -{ - // respond to MSHR targets, if any - - // First offset for critical word first calculations - int initial_offset = 0; - - if (mshr->hasTargets()) { - initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); - } - - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - - if (target->isCpuSide()) { - satisfyCpuSideRequest(target->pkt, blk); - // How many bytes pass the first request is this one - int transfer_offset = - target->pkt->getOffset(blkSize) - initial_offset; - if (transfer_offset < 0) { - transfer_offset += blkSize; - } - - // If critical word (no offset) return first word time - Tick completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; - - if (!target->pkt->req->isUncacheable()) { - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->recvTime; - } - target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, completion_time); - } else { - // response to snoop request - DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true, true); - } - - mshr->popTarget(); - } - - if (mshr->promoteDeferredTargets()) { - MSHRQueue *mq = mshr->queue; - mq->markPending(mshr); - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); - return false; - } - - return true; -} - - template void Cache::handleResponse(PacketPtr pkt) @@ -730,68 +687,105 @@ Cache::handleResponse(PacketPtr pkt) noTargetMSHR = NULL; } - // Can we deallocate MSHR when done? - bool deallocate = false; - // Initial target is used just for stats MSHR::Target *initial_tgt = mshr->getTarget(); + BlkType *blk = tags->findBlock(pkt->getAddr()); int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); Tick miss_latency = curTick - initial_tgt->recvTime; + PacketList writebacks; - if (mshr->isCacheFill) { + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + miss_latency; + } else { mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += miss_latency; + } + + if (mshr->isCacheFill) { DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); - BlkType *blk = tags->findBlock(pkt->getAddr()); // give mshr a chance to do some dirty work mshr->handleFill(pkt, blk); - PacketList writebacks; blk = handleFill(pkt, blk, writebacks); - deallocate = satisfyMSHR(mshr, pkt, blk); - // copy writebacks to write buffer - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - allocateBuffer(wbPkt, time, true); - writebacks.pop_front(); - } - // if we used temp block, clear it out - if (blk == tempBlock) { - if (blk->isDirty()) { - allocateBuffer(writebackBlk(blk), time, true); - } - tags->invalidateBlk(blk); - } - } else { - if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += - miss_latency; - } + assert(blk != NULL); + } - while (mshr->hasTargets()) { - MSHR::Target *target = mshr->getTarget(); - assert(target->isCpuSide()); - mshr->popTarget(); - if (pkt->isRead()) { - target->pkt->setData(pkt->getPtr()); + // First offset for critical word first calculations + int initial_offset = 0; + + if (mshr->hasTargets()) { + initial_offset = mshr->getTarget()->pkt->getOffset(blkSize); + } + + while (mshr->hasTargets()) { + MSHR::Target *target = mshr->getTarget(); + + if (target->isCpuSide()) { + Tick completion_time; + if (blk != NULL) { + satisfyCpuSideRequest(target->pkt, blk); + // How many bytes pass the first request is this one + int transfer_offset = + target->pkt->getOffset(blkSize) - initial_offset; + if (transfer_offset < 0) { + transfer_offset += blkSize; + } + + // If critical word (no offset) return first word time + completion_time = tags->getHitLatency() + + transfer_offset ? pkt->finishTime : pkt->firstWordTime; + + if (!target->pkt->req->isUncacheable()) { + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->recvTime; + } + } else { + // not a cache fill, just forwarding response + completion_time = tags->getHitLatency() + pkt->finishTime; + if (pkt->isRead()) { + target->pkt->setData(pkt->getPtr()); + } } target->pkt->makeTimingResponse(); - cpuSidePort->respond(target->pkt, time); + cpuSidePort->respond(target->pkt, completion_time); + } else { + // response to snoop request + DPRINTF(Cache, "processing deferred snoop...\n"); + handleSnoop(target->pkt, blk, true, true); } - assert(!mshr->hasTargets()); - deallocate = true; - } - delete pkt; + mshr->popTarget(); + } - if (deallocate) { + if (mshr->promoteDeferredTargets()) { + MSHRQueue *mq = mshr->queue; + mq->markPending(mshr); + requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + } else { mq->deallocate(mshr); if (wasFull && !mq->isFull()) { clearBlocked((BlockedCause)mq->index); } } + + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateWriteBuffer(wbPkt, time, true); + writebacks.pop_front(); + } + // if we used temp block, clear it out + if (blk == tempBlock) { + if (blk->isDirty()) { + allocateWriteBuffer(writebackBlk(blk), time, true); + } + tags->invalidateBlk(blk); + } + + delete pkt; } @@ -933,6 +927,9 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); + if (is_deferred) { + snoopPkt->setDeferredSnoop(); + } snoopPkt->senderState = new ForwardResponseRecord(pkt, this); cpuSidePort->sendTiming(snoopPkt); if (snoopPkt->memInhibitAsserted()) { @@ -1020,12 +1017,11 @@ Cache::snoopTiming(PacketPtr pkt) MSHR *mshr = mshrQueue.findMatch(blk_addr); // better not be snooping a request that conflicts with something // we have outstanding... - if (mshr && mshr->inService) { + if (mshr && mshr->handleSnoop(pkt, order++)) { DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n", blk_addr); - mshr->allocateSnoopTarget(pkt, curTick, order++); if (mshr->getNumTargets() > numTarget) - warn("allocating bonus target for snoop"); //handle later + warn("allocating bonus target for snoop"); //handle later return; } @@ -1226,6 +1222,7 @@ template bool Cache::CpuSidePort::recvTiming(PacketPtr pkt) { + // illegal to block responses... can lead to deadlock if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 5d5e63f90..7ba3789fe 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -119,25 +119,23 @@ MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order) ++ntargets; } -void -MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) +bool +MSHR::handleSnoop(PacketPtr pkt, Counter _order) { - assert(inService); // don't bother to call otherwise + if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) { + return false; + } if (pendingInvalidate) { // a prior snoop has already appended an invalidation, so // logically we don't have the block anymore... - return; + return true; } - DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr, - needsExclusive ? "needsExclusive" : "", - pkt->needsExclusive() ? "pkt->needsExclusive()" : ""); - if (needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets.push_back(Target(new Packet(pkt), whenReady, _order, false)); + targets.push_back(Target(new Packet(pkt), curTick, _order, false)); ++ntargets; if (needsExclusive) { @@ -157,6 +155,8 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order) pendingShared = true; pkt->assertShared(); } + + return true; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index a27f465aa..9c6a8cf33 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -162,7 +162,7 @@ public: * @param target The target. */ void allocateTarget(PacketPtr target, Tick when, Counter order); - void allocateSnoopTarget(PacketPtr target, Tick when, Counter order); + bool handleSnoop(PacketPtr target, Counter order); /** A simple constructor. */ MSHR(); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 036bd3fd7..8063c7ae7 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -257,6 +257,7 @@ class Packet : public FastAlloc Shared, // Special control flags ExpressSnoop, + DeferredSnoop, NUM_PACKET_FLAGS }; @@ -322,6 +323,8 @@ class Packet : public FastAlloc // Special control flags void setExpressSnoop() { flags[ExpressSnoop] = true; } bool isExpressSnoop() { return flags[ExpressSnoop]; } + void setDeferredSnoop() { flags[DeferredSnoop] = true; } + bool isDeferredSnoop() { return flags[DeferredSnoop]; } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command -- cgit v1.2.3 From 92ce2b59743c8cace420147e276d7376a4b905f1 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 21 Jul 2007 18:18:42 -0700 Subject: Deal with invalidations intersecting outstanding upgrades. If the invalidation beats the upgrade at a lower level then the upgrade must be converted to a read exclusive "in the field". Restructure target list & deferred target list to factor out some common code. --HG-- extra : convert_revision : 7bab4482dd6c48efdb619610f0d3778c60ff777a --- src/mem/cache/cache_impl.hh | 4 +- src/mem/cache/miss/mshr.cc | 153 +++++++++++++++++++++++++++++++++----------- src/mem/cache/miss/mshr.hh | 35 ++++++---- 3 files changed, 139 insertions(+), 53 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b78360d4a..9fb5cdbde 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -836,7 +836,7 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, // must be an outstanding upgrade request on block // we're about to replace... assert(!blk->isWritable()); - assert(repl_mshr->needsExclusive); + assert(repl_mshr->needsExclusive()); // too hard to replace block with transient state; // just use temporary storage to complete the current // request and then get rid of it @@ -1177,7 +1177,7 @@ Cache::getTimingPacket() pkt = tgt_pkt; } else { BlkType *blk = tags->findBlock(mshr->addr); - pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive); + pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive()); mshr->isCacheFill = (pkt != NULL); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 7ba3789fe..856819c10 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -52,8 +52,51 @@ MSHR::MSHR() inService = false; ntargets = 0; threadNum = -1; + targets = new TargetList(); + deferredTargets = new TargetList(); } + +MSHR::TargetList::TargetList() + : needsExclusive(false), hasUpgrade(false) +{} + + +inline void +MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide) +{ + if (cpuSide) { + if (pkt->needsExclusive()) { + needsExclusive = true; + } + + if (pkt->cmd == MemCmd::UpgradeReq) { + hasUpgrade = true; + } + } + + push_back(Target(pkt, readyTime, order, cpuSide)); +} + + +void +MSHR::TargetList::replaceUpgrades() +{ + if (!hasUpgrade) + return; + + Iterator end_i = end(); + for (Iterator i = begin(); i != end_i; ++i) { + if (i->pkt->cmd == MemCmd::UpgradeReq) { + i->pkt->cmd = MemCmd::ReadExReq; + DPRINTF(Cache, "Replacing UpgradeReq with ReadExReq\n"); + } + } + + hasUpgrade = false; +} + + void MSHR::allocate(Addr _addr, int _size, PacketPtr target, Tick whenReady, Counter _order) @@ -64,16 +107,15 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, order = _order; assert(target); isCacheFill = false; - needsExclusive = target->needsExclusive(); _isUncacheable = target->req->isUncacheable(); inService = false; threadNum = 0; ntargets = 1; // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set cpuSide to true here. - targets.push_back(Target(target, whenReady, _order, true)); - assert(deferredTargets.empty()); - deferredNeedsExclusive = false; + assert(targets->isReset()); + targets->add(target, whenReady, _order, true); + assert(deferredTargets->isReset()); pendingInvalidate = false; pendingShared = false; data = NULL; @@ -82,8 +124,9 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, void MSHR::deallocate() { - assert(targets.empty()); - assert(deferredTargets.empty()); + assert(targets->empty()); + targets->resetFlags(); + assert(deferredTargets->isReset()); assert(ntargets == 0); inService = false; //allocIter = NULL; @@ -94,26 +137,25 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order) +MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) { - if (inService) { - if (!deferredTargets.empty() || pendingInvalidate || - (!needsExclusive && target->needsExclusive())) { - // need to put on deferred list - deferredTargets.push_back(Target(target, whenReady, _order, true)); - if (target->needsExclusive()) { - deferredNeedsExclusive = true; - } - } else { - // still OK to append to outstanding request - targets.push_back(Target(target, whenReady, _order, true)); - } + // if there's a request already in service for this MSHR, we will + // have to defer the new target until after the response if any of + // the following are true: + // - there are other targets already deferred + // - there's a pending invalidate to be applied after the response + // comes back (but before this target is processed) + // - the outstanding request is for a non-exclusive block and this + // target requires an exclusive block + if (inService && + (!deferredTargets->empty() || pendingInvalidate || + (!targets->needsExclusive && pkt->needsExclusive()))) { + // need to put on deferred list + deferredTargets->add(pkt, whenReady, _order, true); } else { - if (target->needsExclusive()) { - needsExclusive = true; - } - - targets.push_back(Target(target, whenReady, _order, true)); + // no request outstanding, or still OK to append to + // outstanding request + targets->add(pkt, whenReady, _order, true); } ++ntargets; @@ -123,22 +165,50 @@ bool MSHR::handleSnoop(PacketPtr pkt, Counter _order) { if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) { + // Request has not been issued yet, or it's been issued + // locally but is buffered unissued at some downstream cache + // which is forwarding us this snoop. Either way, the packet + // we're snooping logically precedes this MSHR's request, so + // the snoop has no impact on the MSHR, but must be processed + // in the standard way by the cache. The only exception is + // that if we're an L2+ cache buffering an UpgradeReq from a + // higher-level cache, and the snoop is invalidating, then our + // buffered upgrades must be converted to read exclusives, + // since the upper-level cache no longer has a valid copy. + // That is, even though the upper-level cache got out on its + // local bus first, some other invalidating transaction + // reached the global bus before the upgrade did. + if (pkt->needsExclusive()) { + targets->replaceUpgrades(); + deferredTargets->replaceUpgrades(); + } + return false; } + // From here on down, the request issued by this MSHR logically + // precedes the request we're snooping. + + if (pkt->needsExclusive()) { + // snooped request still precedes the re-request we'll have to + // issue for deferred targets, if any... + deferredTargets->replaceUpgrades(); + } + if (pendingInvalidate) { // a prior snoop has already appended an invalidation, so - // logically we don't have the block anymore... + // logically we don't have the block anymore; no need for + // further snooping. return true; } - if (needsExclusive || pkt->needsExclusive()) { + if (targets->needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets.push_back(Target(new Packet(pkt), curTick, _order, false)); + targets->add(new Packet(pkt), curTick, _order, false); ++ntargets; - if (needsExclusive) { + if (targets->needsExclusive) { // We're awaiting an exclusive copy, so ownership is pending. // It's up to us to respond once the data arrives. pkt->assertMemInhibit(); @@ -163,21 +233,25 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) bool MSHR::promoteDeferredTargets() { - if (deferredTargets.empty()) { + assert(targets->empty()); + if (deferredTargets->empty()) { return false; } - assert(targets.empty()); + // swap targets & deferredTargets lists + TargetList *tmp = targets; targets = deferredTargets; - deferredTargets.clear(); - assert(targets.size() == ntargets); + deferredTargets = tmp; + + assert(targets->size() == ntargets); + + // clear deferredTargets flags + deferredTargets->resetFlags(); - needsExclusive = deferredNeedsExclusive; pendingInvalidate = false; pendingShared = false; - deferredNeedsExclusive = false; - order = targets.front().order; - readyTime = std::max(curTick, targets.front().readyTime); + order = targets->front().order; + readyTime = std::max(curTick, targets->front().readyTime); return true; } @@ -202,16 +276,17 @@ MSHR::dump() "Addr: %x ntargets %d\n" "Targets:\n", inService, threadNum, addr, ntargets); - - TargetListIterator tar_it = targets.begin(); +#if 0 + TargetListIterator tar_it = targets->begin(); for (int i = 0; i < ntargets; i++) { - assert(tar_it != targets.end()); + assert(tar_it != targets->end()); ccprintf(cerr, "\t%d: Addr: %x cmd: %s\n", i, tar_it->pkt->getAddr(), tar_it->pkt->cmdString()); tar_it++; } +#endif ccprintf(cerr, "\n"); } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 9c6a8cf33..06ef6e113 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -68,10 +68,21 @@ class MSHR : public Packet::SenderState {} }; - /** Defines the Data structure of the MSHR targetlist. */ - typedef std::list TargetList; - /** Target list iterator. */ - typedef std::list::iterator TargetListIterator; + class TargetList : public std::list { + /** Target list iterator. */ + typedef std::list::iterator Iterator; + + public: + bool needsExclusive; + bool hasUpgrade; + + TargetList(); + void resetFlags() { needsExclusive = hasUpgrade = false; } + bool isReset() { return !needsExclusive && !hasUpgrade; } + void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide); + void replaceUpgrades(); + }; + /** A list of MSHRs. */ typedef std::list List; /** MSHR list iterator. */ @@ -99,13 +110,13 @@ class MSHR : public Packet::SenderState /** True if we will be putting the returned block in the cache */ bool isCacheFill; + /** True if we need to get an exclusive copy of the block. */ - bool needsExclusive; + bool needsExclusive() { return targets->needsExclusive; } /** True if the request is uncacheable */ bool _isUncacheable; - bool deferredNeedsExclusive; bool pendingInvalidate; bool pendingShared; @@ -133,9 +144,9 @@ class MSHR : public Packet::SenderState private: /** List of all requests that match the address */ - TargetList targets; + TargetList *targets; - TargetList deferredTargets; + TargetList *deferredTargets; public: @@ -179,19 +190,19 @@ public: * Returns a pointer to the target list. * @return a pointer to the target list. */ - TargetList* getTargetList() { return &targets; } + TargetList *getTargetList() { return targets; } /** * Returns true if there are targets left. * @return true if there are targets */ - bool hasTargets() { return !targets.empty(); } + bool hasTargets() { return !targets->empty(); } /** * Returns a reference to the first target. * @return A pointer to the first target. */ - Target *getTarget() { assert(hasTargets()); return &targets.front(); } + Target *getTarget() { assert(hasTargets()); return &targets->front(); } /** * Pop first target. @@ -199,7 +210,7 @@ public: void popTarget() { --ntargets; - targets.pop_front(); + targets->pop_front(); } bool isSimpleForward() -- cgit v1.2.3 From 1edd143b688e9bd7285344ee8d2f5bd8cb3dc12d Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Jul 2007 03:07:26 -0400 Subject: A few minor non-debug compilation issues. --HG-- extra : convert_revision : d59a5cad6187a2229dddd1a48282ebd2923d1d8d --- src/mem/cache/cache_impl.hh | 2 ++ src/mem/cache/miss/mshr_queue.cc | 1 + src/mem/cache/tags/split.cc | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 9fb5cdbde..c8c1a239c 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -821,7 +821,9 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, PacketList &writebacks) { Addr addr = pkt->getAddr(); +#if TRACING_ON CacheBlk::State old_state = blk ? blk->status : 0; +#endif if (blk == NULL) { // better have read new data... diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 56ec62a7d..4d3cf30e1 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -123,6 +123,7 @@ MSHRQueue::addToReadyList(MSHR *mshr) } } assert(false); + return end; // keep stupid compilers happy } diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc index e22ccbb96..ae284766d 100644 --- a/src/mem/cache/tags/split.cc +++ b/src/mem/cache/tags/split.cc @@ -300,7 +300,7 @@ Split::findBlock(Addr addr) const SplitBlk* Split::findReplacement(Addr addr, PacketList &writebacks) { - SplitBlk *blk; + SplitBlk *blk = NULL; assert(0); #if 0 -- cgit v1.2.3 From 1c2d5f5e64387527efe495a59f6946e7b539a543 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Jul 2007 08:09:24 -0700 Subject: Replace DeferredSnoop flag with LowerMSHRPending flag. Turns out DeferredSnoop isn't quite the right bit of info we needed... see new comment in cache_impl.hh. --HG-- extra : convert_revision : a38de8c1677a37acafb743b7074ef88b21d3b7be --- src/mem/cache/cache.hh | 3 ++- src/mem/cache/cache_impl.hh | 28 ++++++++++++++++++++-------- src/mem/cache/miss/mshr.cc | 2 +- src/mem/packet.hh | 6 +++--- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 57028a05e..7dfe9e8f1 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -190,7 +190,8 @@ class Cache : public BaseCache * @param new_state The new coherence state for the block. */ void handleSnoop(PacketPtr ptk, BlkType *blk, - bool is_timing, bool is_deferred); + bool is_timing, bool is_deferred, + bool lower_mshr_pending); /** * Create a writeback request for the given block. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c8c1a239c..82410afe1 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -754,7 +754,7 @@ Cache::handleResponse(PacketPtr pkt) } else { // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true, true); + handleSnoop(target->pkt, blk, true, true, false); } mshr->popTarget(); @@ -917,7 +917,8 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, template void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, - bool is_timing, bool is_deferred) + bool is_timing, bool is_deferred, + bool lower_mshr_pending) { assert(pkt->isRequest()); @@ -929,8 +930,8 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); - if (is_deferred) { - snoopPkt->setDeferredSnoop(); + if (lower_mshr_pending) { + snoopPkt->setLowerMSHRPending(); } snoopPkt->senderState = new ForwardResponseRecord(pkt, this); cpuSidePort->sendTiming(snoopPkt); @@ -1017,8 +1018,19 @@ Cache::snoopTiming(PacketPtr pkt) Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); MSHR *mshr = mshrQueue.findMatch(blk_addr); - // better not be snooping a request that conflicts with something - // we have outstanding... + + // If a lower cache has an operation on this block pending (not + // yet in service) on the MSHR, then the upper caches need to know + // about it, as this means that the pending operation logically + // succeeds the current snoop. It's not sufficient to record + // whether the MSHR *is* in service, as this misses the window + // where the lower cache has completed the request and the + // response is on its way back up the hierarchy. + bool lower_mshr_pending = + (mshr && (!mshr->inService) || pkt->lowerMSHRPending()); + + // Let the MSHR itself track the snoop and decide whether we want + // to go ahead and do the regular cache snoop if (mshr && mshr->handleSnoop(pkt, order++)) { DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n", blk_addr); @@ -1063,7 +1075,7 @@ Cache::snoopTiming(PacketPtr pkt) } } - handleSnoop(pkt, blk, true, false); + handleSnoop(pkt, blk, true, false, lower_mshr_pending); } @@ -1078,7 +1090,7 @@ Cache::snoopAtomic(PacketPtr pkt) } BlkType *blk = tags->findBlock(pkt->getAddr()); - handleSnoop(pkt, blk, false, false); + handleSnoop(pkt, blk, false, false, false); return hitLatency; } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 856819c10..b9dfdf729 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -164,7 +164,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) bool MSHR::handleSnoop(PacketPtr pkt, Counter _order) { - if (!inService || (pkt->isExpressSnoop() && !pkt->isDeferredSnoop())) { + if (!inService || (pkt->isExpressSnoop() && pkt->lowerMSHRPending())) { // Request has not been issued yet, or it's been issued // locally but is buffered unissued at some downstream cache // which is forwarding us this snoop. Either way, the packet diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 8063c7ae7..779ea49a2 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -257,7 +257,7 @@ class Packet : public FastAlloc Shared, // Special control flags ExpressSnoop, - DeferredSnoop, + LowerMSHRPending, // not yet in service NUM_PACKET_FLAGS }; @@ -323,8 +323,8 @@ class Packet : public FastAlloc // Special control flags void setExpressSnoop() { flags[ExpressSnoop] = true; } bool isExpressSnoop() { return flags[ExpressSnoop]; } - void setDeferredSnoop() { flags[DeferredSnoop] = true; } - bool isDeferredSnoop() { return flags[DeferredSnoop]; } + void setLowerMSHRPending() { flags[LowerMSHRPending] = true; } + bool lowerMSHRPending() { return flags[LowerMSHRPending]; } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command -- cgit v1.2.3 From 82e2a3557672864f0ea3ae64dad61681546aaf07 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Jul 2007 21:43:38 -0700 Subject: Replace lowerMSHRPending flag with more robust scheme based on following Packet senderState links. --HG-- extra : convert_revision : 9027d59bd7242aa0e4275bf94d8b1fb27bd59d79 --- src/mem/cache/cache.hh | 3 +-- src/mem/cache/cache_impl.hh | 22 ++++-------------- src/mem/cache/miss/mshr.cc | 48 ++++++++++++++++++++++++++++++++++++++-- src/mem/cache/miss/mshr.hh | 5 +++++ src/mem/cache/miss/mshr_queue.cc | 16 ++++---------- src/mem/packet.hh | 3 --- 6 files changed, 60 insertions(+), 37 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 7dfe9e8f1..57028a05e 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -190,8 +190,7 @@ class Cache : public BaseCache * @param new_state The new coherence state for the block. */ void handleSnoop(PacketPtr ptk, BlkType *blk, - bool is_timing, bool is_deferred, - bool lower_mshr_pending); + bool is_timing, bool is_deferred); /** * Create a writeback request for the given block. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 82410afe1..efd7f4588 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -754,7 +754,7 @@ Cache::handleResponse(PacketPtr pkt) } else { // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true, true, false); + handleSnoop(target->pkt, blk, true, true); } mshr->popTarget(); @@ -917,8 +917,7 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, template void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, - bool is_timing, bool is_deferred, - bool lower_mshr_pending) + bool is_timing, bool is_deferred) { assert(pkt->isRequest()); @@ -930,9 +929,6 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); - if (lower_mshr_pending) { - snoopPkt->setLowerMSHRPending(); - } snoopPkt->senderState = new ForwardResponseRecord(pkt, this); cpuSidePort->sendTiming(snoopPkt); if (snoopPkt->memInhibitAsserted()) { @@ -1019,16 +1015,6 @@ Cache::snoopTiming(PacketPtr pkt) Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); MSHR *mshr = mshrQueue.findMatch(blk_addr); - // If a lower cache has an operation on this block pending (not - // yet in service) on the MSHR, then the upper caches need to know - // about it, as this means that the pending operation logically - // succeeds the current snoop. It's not sufficient to record - // whether the MSHR *is* in service, as this misses the window - // where the lower cache has completed the request and the - // response is on its way back up the hierarchy. - bool lower_mshr_pending = - (mshr && (!mshr->inService) || pkt->lowerMSHRPending()); - // Let the MSHR itself track the snoop and decide whether we want // to go ahead and do the regular cache snoop if (mshr && mshr->handleSnoop(pkt, order++)) { @@ -1075,7 +1061,7 @@ Cache::snoopTiming(PacketPtr pkt) } } - handleSnoop(pkt, blk, true, false, lower_mshr_pending); + handleSnoop(pkt, blk, true, false); } @@ -1090,7 +1076,7 @@ Cache::snoopAtomic(PacketPtr pkt) } BlkType *blk = tags->findBlock(pkt->getAddr()); - handleSnoop(pkt, blk, false, false, false); + handleSnoop(pkt, blk, false, false); return hitLatency; } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index b9dfdf729..9b05aea3f 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -63,7 +63,8 @@ MSHR::TargetList::TargetList() inline void -MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide) +MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, + Counter order, bool cpuSide) { if (cpuSide) { if (pkt->needsExclusive()) { @@ -73,6 +74,12 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide if (pkt->cmd == MemCmd::UpgradeReq) { hasUpgrade = true; } + + MSHR *mshr = dynamic_cast(pkt->senderState); + if (mshr != NULL) { + assert(!mshr->downstreamPending); + mshr->downstreamPending = true; + } } push_back(Target(pkt, readyTime, order, cpuSide)); @@ -97,6 +104,20 @@ MSHR::TargetList::replaceUpgrades() } +void +MSHR::TargetList::clearDownstreamPending() +{ + Iterator end_i = end(); + for (Iterator i = begin(); i != end_i; ++i) { + MSHR *mshr = dynamic_cast(i->pkt->senderState); + if (mshr != NULL) { + assert(mshr->downstreamPending); + mshr->downstreamPending = false; + } + } +} + + void MSHR::allocate(Addr _addr, int _size, PacketPtr target, Tick whenReady, Counter _order) @@ -109,6 +130,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, isCacheFill = false; _isUncacheable = target->req->isUncacheable(); inService = false; + downstreamPending = false; threadNum = 0; ntargets = 1; // Don't know of a case where we would allocate a new MSHR for a @@ -121,6 +143,28 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, data = NULL; } + +bool +MSHR::markInService() +{ + assert(!inService); + if (isSimpleForward()) { + // we just forwarded the request packet & don't expect a + // response, so get rid of it + assert(getNumTargets() == 1); + popTarget(); + return true; + } + inService = true; + if (!downstreamPending) { + // let upstream caches know that the request has made it to a + // level where it's going to get a response + targets->clearDownstreamPending(); + } + return false; +} + + void MSHR::deallocate() { @@ -164,7 +208,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) bool MSHR::handleSnoop(PacketPtr pkt, Counter _order) { - if (!inService || (pkt->isExpressSnoop() && pkt->lowerMSHRPending())) { + if (!inService || downstreamPending) { // Request has not been issued yet, or it's been issued // locally but is buffered unissued at some downstream cache // which is forwarding us this snoop. Either way, the packet diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 06ef6e113..e850a8633 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -81,6 +81,7 @@ class MSHR : public Packet::SenderState bool isReset() { return !needsExclusive && !hasUpgrade; } void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide); void replaceUpgrades(); + void clearDownstreamPending(); }; /** A list of MSHRs. */ @@ -117,6 +118,8 @@ class MSHR : public Packet::SenderState /** True if the request is uncacheable */ bool _isUncacheable; + bool downstreamPending; + bool pendingInvalidate; bool pendingShared; @@ -163,6 +166,8 @@ public: void allocate(Addr addr, int size, PacketPtr pkt, Tick when, Counter _order); + bool markInService(); + /** * Mark this MSHR as free. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 4d3cf30e1..50a28fb3c 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -179,20 +179,12 @@ MSHRQueue::moveToFront(MSHR *mshr) void MSHRQueue::markInService(MSHR *mshr) { - assert(!mshr->inService); - if (mshr->isSimpleForward()) { - // we just forwarded the request packet & don't expect a - // response, so get rid of it - assert(mshr->getNumTargets() == 1); - mshr->popTarget(); + if (mshr->markInService()) { deallocate(mshr); - return; + } else { + readyList.erase(mshr->readyIter); + inServiceEntries += 1; } - mshr->inService = true; - readyList.erase(mshr->readyIter); - //mshr->readyIter = NULL; - inServiceEntries += 1; - //readyList.pop_front(); } void diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 779ea49a2..036bd3fd7 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -257,7 +257,6 @@ class Packet : public FastAlloc Shared, // Special control flags ExpressSnoop, - LowerMSHRPending, // not yet in service NUM_PACKET_FLAGS }; @@ -323,8 +322,6 @@ class Packet : public FastAlloc // Special control flags void setExpressSnoop() { flags[ExpressSnoop] = true; } bool isExpressSnoop() { return flags[ExpressSnoop]; } - void setLowerMSHRPending() { flags[LowerMSHRPending] = true; } - bool lowerMSHRPending() { return flags[LowerMSHRPending]; } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command -- cgit v1.2.3 From 97f7ee2e507733eb9dd1802c16900fd14ae6b7f3 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 23 Jul 2007 08:18:51 -0700 Subject: Fix WriteReq/StoreCondReq setting in O3. --HG-- extra : convert_revision : b41571535f3d1f78df3cb6e48c16de5c7549d87f --- src/cpu/o3/lsq_unit_impl.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 5ae1cc0e4..8b2e82d8e 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -649,7 +649,7 @@ LSQUnit::writebackStores() MemCmd command = req->isSwap() ? MemCmd::SwapReq : - (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq); + (req->isLocked() ? MemCmd::StoreCondReq : MemCmd::WriteReq); PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); -- cgit v1.2.3 From 1f9ea6e122f6a39d936aec2f8f5ce72d267799a8 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 23 Jul 2007 22:28:40 -0700 Subject: A couple more minor bug fixes for multilevel coherence. --HG-- extra : convert_revision : 370f9e34911157765be6fd49e826fa1af589b466 --- src/mem/cache/cache_impl.hh | 21 ++++++++++++++------- src/mem/cache/miss/mshr.cc | 10 ++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index efd7f4588..412d10599 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -148,7 +148,13 @@ void Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) { assert(blk); - assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); + // Occasionally this is not true... if we are a lower-level cache + // satisfying a string of Read and ReadEx requests from + // upper-level caches, a Read will mark the block as shared but we + // can satisfy a following ReadEx anyway since we can rely on the + // Read requester(s) to have buffered the ReadEx snoop and to + // invalidate their blocks after receiving them. + // assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); // Check RMW operations first since both isRead() and @@ -727,7 +733,7 @@ Cache::handleResponse(PacketPtr pkt) Tick completion_time; if (blk != NULL) { satisfyCpuSideRequest(target->pkt, blk); - // How many bytes pass the first request is this one + // How many bytes past the first request is this one int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; if (transfer_offset < 0) { @@ -738,10 +744,9 @@ Cache::handleResponse(PacketPtr pkt) completion_time = tags->getHitLatency() + transfer_offset ? pkt->finishTime : pkt->firstWordTime; - if (!target->pkt->req->isUncacheable()) { - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += - completion_time - target->recvTime; - } + assert(!target->pkt->req->isUncacheable()); + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + completion_time - target->recvTime; } else { // not a cache fill, just forwarding response completion_time = tags->getHitLatency() + pkt->finishTime; @@ -1004,7 +1009,9 @@ template void Cache::snoopTiming(PacketPtr pkt) { - if (pkt->req->isUncacheable()) { + // Note that some deferred snoops don't have requests, since the + // original access may have already completed + if (pkt->req && pkt->req->isUncacheable()) { //Can't get a hit on an uncacheable address //Revisit this for multi level coherence return; diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 9b05aea3f..7f216ad39 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -208,7 +208,7 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) bool MSHR::handleSnoop(PacketPtr pkt, Counter _order) { - if (!inService || downstreamPending) { + if (!inService || (pkt->isExpressSnoop() && downstreamPending)) { // Request has not been issued yet, or it's been issued // locally but is buffered unissued at some downstream cache // which is forwarding us this snoop. Either way, the packet @@ -249,13 +249,19 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) if (targets->needsExclusive || pkt->needsExclusive()) { // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here - targets->add(new Packet(pkt), curTick, _order, false); + PacketPtr cp_pkt = new Packet(pkt); + targets->add(cp_pkt, curTick, _order, false); ++ntargets; if (targets->needsExclusive) { // We're awaiting an exclusive copy, so ownership is pending. // It's up to us to respond once the data arrives. pkt->assertMemInhibit(); + } else { + // Someone else may respond before we get around to + // processing this snoop, which means the copied request + // pointer will no longer be valid + cp_pkt->req = NULL; } if (pkt->needsExclusive()) { -- cgit v1.2.3 From d094f9cf277bf08e57057a68ac6beb295bee5ce1 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 24 Jul 2007 22:36:10 -0700 Subject: Don't delete request at target... requester still needs it. --HG-- extra : convert_revision : 76377ca2e4d7ea70d1d54d325a63ce710e260b93 --- src/mem/tport.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mem/tport.cc b/src/mem/tport.cc index a4f791048..e4b8d70e9 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -69,7 +69,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt) if (pkt->memInhibitAsserted()) { // snooper will supply based on copy of packet // still target's responsibility to delete packet - delete pkt->req; delete pkt; return true; } -- cgit v1.2.3 From de52eebd3bbdeebadd5d7e7ce5f16dd68efb8301 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 24 Jul 2007 22:37:41 -0700 Subject: Integrate snoop loop functions into their respective call sites. Also some additional cleanup of Bus::recvTiming(). --HG-- extra : convert_revision : 156814119f75d04c2e954aec2d7ed6fdc186c26f --- src/mem/bus.cc | 156 +++++++++++++++++++++++++-------------------------------- src/mem/bus.hh | 9 ---- 2 files changed, 67 insertions(+), 98 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index da8df06ea..8243d40f1 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -172,78 +172,79 @@ void Bus::occupyBus(PacketPtr pkt) bool Bus::recvTiming(PacketPtr pkt) { - int port_id; + short src = pkt->getSrc(); DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", - pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); + src, pkt->getDest(), pkt->getAddr(), pkt->cmdString()); - BusPort *pktPort; - if (pkt->getSrc() == defaultId) - pktPort = defaultPort; - else pktPort = interfaces[pkt->getSrc()]; + BusPort *src_port = (src == defaultId) ? defaultPort : interfaces[src]; // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. if (!(pkt->isResponse() || pkt->isExpressSnoop()) && (tickNextIdle > curTick || - (retryList.size() && (!inRetry || pktPort != retryList.front())))) + (retryList.size() && (!inRetry || src_port != retryList.front())))) { - addToRetryList(pktPort); + addToRetryList(src_port); DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n"); return false; } + occupyBus(pkt); + short dest = pkt->getDest(); + int dest_port_id; + Port *dest_port; if (dest == Packet::Broadcast) { - port_id = findPort(pkt->getAddr()); - timingSnoop(pkt, interfaces[port_id]); - - if (pkt->memInhibitAsserted()) { - //Cache-Cache transfer occuring - if (inRetry) { - retryList.front()->onRetryList(false); - retryList.pop_front(); - inRetry = false; + dest_port_id = findPort(pkt->getAddr()); + dest_port = interfaces[dest_port_id]; + for (SnoopIter s_iter = snoopPorts.begin(); + s_iter != snoopPorts.end(); + s_iter++) { + BusPort *p = *s_iter; + if (p != dest_port && p != src_port) { +#ifndef NDEBUG + // cache is not allowed to refuse snoop + bool success = p->sendTiming(pkt); + assert(success); +#else + // avoid unused variable warning + p->sendTiming(pkt); +#endif } - occupyBus(pkt); - DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n"); - return true; } } else { assert(dest >= 0 && dest < maxId); - assert(dest != pkt->getSrc()); // catch infinite loops - port_id = dest; + assert(dest != src); // catch infinite loops + dest_port_id = dest; + dest_port = interfaces[dest_port_id]; } - occupyBus(pkt); - - if (port_id != pkt->getSrc()) { - if (interfaces[port_id]->sendTiming(pkt)) { - // Packet was successfully sent. Return true. - // Also take care of retries - if (inRetry) { - DPRINTF(Bus, "Remove retry from list %d\n", - retryList.front()->getId()); - retryList.front()->onRetryList(false); - retryList.pop_front(); - inRetry = false; - } - return true; + if (dest_port_id == src) { + // Must be forwarded snoop up from below... + assert(dest == Packet::Broadcast); + } else { + // send to actual target + if (!dest_port->sendTiming(pkt)) { + // Packet not successfully sent. Leave or put it on the retry list. + // illegal to block responses... can lead to deadlock + assert(!pkt->isResponse()); + DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n", src); + addToRetryList(src_port); + return false; } - - // Packet not successfully sent. Leave or put it on the retry list. - // illegal to block responses... can lead to deadlock - assert(!pkt->isResponse()); - DPRINTF(Bus, "Adding2 a retry to RETRY list %d\n", - pktPort->getId()); - addToRetryList(pktPort); - return false; + // send OK, fall through } - else { - //Forwarding up from responder, just return true; - DPRINTF(Bus, "recvTiming: can we be here?\n"); - return true; + + // Packet was successfully sent. + // Also take care of retries + if (inRetry) { + DPRINTF(Bus, "Remove retry from list %d\n", src); + retryList.front()->onRetryList(false); + retryList.pop_front(); + inRetry = false; } + return true; } void @@ -314,46 +315,6 @@ Bus::findPort(Addr addr) return dest_id; } -void -Bus::functionalSnoop(PacketPtr pkt, Port *responder) -{ - // The packet may be changed by another bus on snoops, restore the - // id after each - int src_id = pkt->getSrc(); - - assert(pkt->isRequest()); // hasn't already been satisfied - - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); - s_iter++) { - BusPort *p = *s_iter; - if (p != responder && p->getId() != src_id) { - p->sendFunctional(pkt); - } - if (pkt->isResponse()) { - break; - } - pkt->setSrc(src_id); - } -} - -bool -Bus::timingSnoop(PacketPtr pkt, Port* responder) -{ - for (SnoopIter s_iter = snoopPorts.begin(); - s_iter != snoopPorts.end(); - s_iter++) { - BusPort *p = *s_iter; - if (p != responder && p->getId() != pkt->getSrc()) { - bool success = p->sendTiming(pkt); - if (!success) - return false; - } - } - - return true; -} - /** Function called by the port when the bus is receiving a Atomic * transaction.*/ @@ -434,7 +395,24 @@ Bus::recvFunctional(PacketPtr pkt) int port_id = findPort(pkt->getAddr()); Port *port = interfaces[port_id]; - functionalSnoop(pkt, port); + // The packet may be changed by another bus on snoops, restore the + // id after each + int src_id = pkt->getSrc(); + + assert(pkt->isRequest()); // hasn't already been satisfied + + for (SnoopIter s_iter = snoopPorts.begin(); + s_iter != snoopPorts.end(); + s_iter++) { + BusPort *p = *s_iter; + if (p != port && p->getId() != src_id) { + p->sendFunctional(pkt); + } + if (pkt->isResponse()) { + break; + } + pkt->setSrc(src_id); + } // If the snooping hasn't found what we were looking for, keep going. if (!pkt->isResponse() && port_id != pkt->getSrc()) { diff --git a/src/mem/bus.hh b/src/mem/bus.hh index a19420244..06ccd4ac0 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -180,15 +180,6 @@ class Bus : public MemObject */ int findPort(Addr addr); - /** Snoop all relevant ports functionally. */ - void functionalSnoop(PacketPtr pkt, Port *responder); - - /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want - * the snoop to happen - * @return True if succeds. - */ - bool timingSnoop(PacketPtr pkt, Port *responder); - /** Process address range request. * @param resp addresses that we can respond to * @param snoop addresses that we would like to snoop -- cgit v1.2.3 From c1097d06f7b27f4dd6ecaa47d1685e015725b5f5 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Wed, 25 Jul 2007 07:47:37 -0700 Subject: Can't block on memInhibit packets (now that bus no longer filters them for us). --HG-- extra : convert_revision : 34e7eaf5ee1e739f5557a2d417e569ed2ceb14b3 --- src/mem/cache/cache_impl.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 412d10599..fa2f45632 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1230,7 +1230,7 @@ bool Cache::CpuSidePort::recvTiming(PacketPtr pkt) { // illegal to block responses... can lead to deadlock - if (pkt->isRequest() && blocked) { + if (pkt->isRequest() && !pkt->memInhibitAsserted() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; return false; -- cgit v1.2.3 From 58250b8e5fd8aba9ed99b7aff6ce67b05b379fa0 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 26 Jul 2007 17:04:12 -0700 Subject: bus: Fix default port handling. --HG-- extra : convert_revision : 121b6e31cddff17c51fc4f3df20e7e2bde87d04f --- src/mem/bus.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 8243d40f1..1fad13c5a 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -197,7 +197,8 @@ Bus::recvTiming(PacketPtr pkt) if (dest == Packet::Broadcast) { dest_port_id = findPort(pkt->getAddr()); - dest_port = interfaces[dest_port_id]; + dest_port = (dest_port_id == defaultId) ? + defaultPort : interfaces[dest_port_id]; for (SnoopIter s_iter = snoopPorts.begin(); s_iter != snoopPorts.end(); s_iter++) { @@ -217,7 +218,8 @@ Bus::recvTiming(PacketPtr pkt) assert(dest >= 0 && dest < maxId); assert(dest != src); // catch infinite loops dest_port_id = dest; - dest_port = interfaces[dest_port_id]; + dest_port = (dest_port_id == defaultId) ? + defaultPort : interfaces[dest_port_id]; } if (dest_port_id == src) { @@ -336,7 +338,8 @@ Bus::recvAtomic(PacketPtr pkt) int orig_src = pkt->getSrc(); int target_port_id = findPort(pkt->getAddr()); - Port *target_port = interfaces[target_port_id]; + Port *target_port = (target_port_id == defaultId) ? + defaultPort : interfaces[target_port_id]; SnoopIter s_end = snoopPorts.end(); for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) { @@ -394,7 +397,7 @@ Bus::recvFunctional(PacketPtr pkt) assert(pkt->getDest() == Packet::Broadcast); int port_id = findPort(pkt->getAddr()); - Port *port = interfaces[port_id]; + Port *port = (port_id == defaultId) ? defaultPort : interfaces[port_id]; // The packet may be changed by another bus on snoops, restore the // id after each int src_id = pkt->getSrc(); -- cgit v1.2.3 From f1b5c8fb57ee656df3a1b9d021de723279f66b2f Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 26 Jul 2007 17:04:16 -0700 Subject: Continue snooping after a writeback is encountered. --HG-- extra : convert_revision : 8411338a6c0fdd7072dd32bdffacdace62d5de90 --- src/mem/cache/cache_impl.hh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index fa2f45632..6db40b609 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1064,7 +1064,13 @@ Cache::snoopTiming(PacketPtr pkt) // Invalidation trumps our writeback... discard here markInService(mshr); } - return; + + // If this was a shared writeback, there may still be + // other shared copies above that require invalidation. + // We could be more selective and return here if the + // request is non-exclusive or if the writeback is + // exclusive. + break; } } -- cgit v1.2.3 From c3bf59dcb9dddd64d5ad603f7af6589b87e7afad Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 26 Jul 2007 17:04:17 -0700 Subject: Add downward express snoops for invalidations. --HG-- extra : convert_revision : 4916fa9721d727d8416ad8c07df3a8171d02b2b4 --- src/mem/cache/cache_impl.hh | 15 +++++++++++++++ src/mem/cache/miss/mshr.cc | 1 + src/mem/packet.hh | 8 +++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 6db40b609..2ead54ba6 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -371,6 +371,17 @@ Cache::timingAccess(PacketPtr pkt) DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", pkt->getAddr()); assert(!pkt->req->isUncacheable()); + // Special tweak for multilevel coherence: snoop downward here + // on invalidates since there may be other caches below here + // that have shared copies. Not necessary if we know that + // supplier had exclusive copy to begin with. + if (pkt->needsExclusive() && !pkt->isSupplyExclusive()) { + Packet *snoopPkt = new Packet(pkt, true); // clear flags + snoopPkt->setExpressSnoop(); + snoopPkt->assertMemInhibit(); + memSidePort->sendTiming(snoopPkt); + // main memory will delete snoopPkt + } return true; } @@ -966,6 +977,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // we respond in atomic mode), so just figure out what to do now // and then do it later bool supply = blk->isDirty() && pkt->isRead() && !upperSupply; + bool have_exclusive = blk->isWritable(); bool invalidate = pkt->isInvalidate(); if (pkt->isRead() && !pkt->isInvalidate()) { @@ -985,6 +997,9 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (supply) { assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); + if (have_exclusive) { + pkt->setSupplyExclusive(); + } if (is_timing) { doTimingSupplyResponse(pkt, blk->data, is_deferred); } else { diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 7f216ad39..5ba3d1ec5 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -257,6 +257,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) // We're awaiting an exclusive copy, so ownership is pending. // It's up to us to respond once the data arrives. pkt->assertMemInhibit(); + pkt->setSupplyExclusive(); } else { // Someone else may respond before we get around to // processing this snoop, which means the copied request diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 036bd3fd7..c6534d6c9 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -256,7 +256,11 @@ class Packet : public FastAlloc MemInhibit, Shared, // Special control flags + /// Special timing-mode atomic snoop for multi-level coherence. ExpressSnoop, + /// Does supplier have exclusive copy? + /// Useful for multi-level coherence. + SupplyExclusive, NUM_PACKET_FLAGS }; @@ -315,13 +319,15 @@ class Packet : public FastAlloc // Snoop flags void assertMemInhibit() { flags[MemInhibit] = true; } - void assertShared() { flags[Shared] = true; } bool memInhibitAsserted() { return flags[MemInhibit]; } + void assertShared() { flags[Shared] = true; } bool sharedAsserted() { return flags[Shared]; } // Special control flags void setExpressSnoop() { flags[ExpressSnoop] = true; } bool isExpressSnoop() { return flags[ExpressSnoop]; } + void setSupplyExclusive() { flags[SupplyExclusive] = true; } + bool isSupplyExclusive() { return flags[SupplyExclusive]; } // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command -- cgit v1.2.3 From 6b73ff43ff58502c80050c7aeff5a08a4ce61f87 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 26 Jul 2007 17:04:17 -0700 Subject: Have owner respond to UpgradeReq to avoid race. --HG-- extra : convert_revision : 30916fca6978c73d8a14558f2d7288c1eab54ad4 --- src/mem/cache/cache_impl.hh | 46 +++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 2ead54ba6..a35d7b2a6 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -926,7 +926,9 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, } pkt->allocate(); pkt->makeTimingResponse(); - pkt->setDataFromBlock(blk_data, blkSize); + if (pkt->isRead()) { + pkt->setDataFromBlock(blk_data, blkSize); + } memSidePort->respond(pkt, curTick + hitLatency); } @@ -940,8 +942,8 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // first propagate snoop upward to see if anyone above us wants to // handle it. save & restore packet src since it will get // rewritten to be relative to cpu-side bus (if any) - bool alreadySupplied = pkt->memInhibitAsserted(); - bool upperSupply = false; + bool alreadyResponded = pkt->memInhibitAsserted(); + bool upperResponse = false; if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); @@ -949,7 +951,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, cpuSidePort->sendTiming(snoopPkt); if (snoopPkt->memInhibitAsserted()) { // cache-to-cache response from some upper cache - assert(!alreadySupplied); + assert(!alreadyResponded); pkt->assertMemInhibit(); } else { delete snoopPkt->senderState; @@ -961,7 +963,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, } else { int origSrc = pkt->getSrc(); cpuSidePort->sendAtomic(pkt); - if (!alreadySupplied && pkt->memInhibitAsserted()) { + if (!alreadyResponded && pkt->memInhibitAsserted()) { // cache-to-cache response from some upper cache: // forward response to original requester assert(pkt->isResponse()); @@ -976,7 +978,8 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // we may end up modifying both the block state and the packet (if // we respond in atomic mode), so just figure out what to do now // and then do it later - bool supply = blk->isDirty() && pkt->isRead() && !upperSupply; + assert(!(blk->isDirty() && upperResponse)); + bool respond = blk->isDirty() && pkt->needsResponse(); bool have_exclusive = blk->isWritable(); bool invalidate = pkt->isInvalidate(); @@ -994,7 +997,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, blk->status &= ~bits_to_clear; } - if (supply) { + if (respond) { assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); if (have_exclusive) { @@ -1016,7 +1019,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n", pkt->cmdString(), blockAlign(pkt->getAddr()), - supply ? "supplying data, " : "", blk->status); + respond ? "responding, " : "", blk->status); } @@ -1026,7 +1029,8 @@ Cache::snoopTiming(PacketPtr pkt) { // Note that some deferred snoops don't have requests, since the // original access may have already completed - if (pkt->req && pkt->req->isUncacheable()) { + if ((pkt->req && pkt->req->isUncacheable()) || + pkt->cmd == MemCmd::Writeback) { //Can't get a hit on an uncacheable address //Revisit this for multi level coherence return; @@ -1061,19 +1065,17 @@ Cache::snoopTiming(PacketPtr pkt) PacketPtr wb_pkt = mshr->getTarget()->pkt; assert(wb_pkt->cmd == MemCmd::Writeback); - if (pkt->isRead()) { - assert(!pkt->memInhibitAsserted()); - pkt->assertMemInhibit(); - if (!pkt->needsExclusive()) { - pkt->assertShared(); - } else { - // if we're not asserting the shared line, we need to - // invalidate our copy. we'll do that below as long as - // the packet's invalidate flag is set... - assert(pkt->isInvalidate()); - } - doTimingSupplyResponse(pkt, wb_pkt->getPtr(), false); + assert(!pkt->memInhibitAsserted()); + pkt->assertMemInhibit(); + if (!pkt->needsExclusive()) { + pkt->assertShared(); + } else { + // if we're not asserting the shared line, we need to + // invalidate our copy. we'll do that below as long as + // the packet's invalidate flag is set... + assert(pkt->isInvalidate()); } + doTimingSupplyResponse(pkt, wb_pkt->getPtr(), false); if (pkt->isInvalidate()) { // Invalidation trumps our writeback... discard here @@ -1097,7 +1099,7 @@ template Tick Cache::snoopAtomic(PacketPtr pkt) { - if (pkt->req->isUncacheable()) { + if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) { // Can't get a hit on an uncacheable address // Revisit this for multi level coherence return hitLatency; -- cgit v1.2.3 From 01c9d34a0b4bcef3d8cca12eaeb7753e376378a8 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 27 Jul 2007 03:51:15 -0400 Subject: cache: Get rid of unused variable. --HG-- extra : convert_revision : 394adc12fbd7ea10280a1b8d6bc3cb15ee019f27 --- src/mem/cache/cache_impl.hh | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a35d7b2a6..150cf80b7 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -943,7 +943,6 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // handle it. save & restore packet src since it will get // rewritten to be relative to cpu-side bus (if any) bool alreadyResponded = pkt->memInhibitAsserted(); - bool upperResponse = false; if (is_timing) { Packet *snoopPkt = new Packet(pkt, true); // clear flags snoopPkt->setExpressSnoop(); @@ -978,7 +977,6 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // we may end up modifying both the block state and the packet (if // we respond in atomic mode), so just figure out what to do now // and then do it later - assert(!(blk->isDirty() && upperResponse)); bool respond = blk->isDirty() && pkt->needsResponse(); bool have_exclusive = blk->isWritable(); bool invalidate = pkt->isInvalidate(); -- cgit v1.2.3 From 0cbcb715e0f6f2f7b1338d37e641ef931247748f Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 27 Jul 2007 12:46:45 -0700 Subject: cache/memtest: fixes for functional accesses. --HG-- extra : convert_revision : 688ba4d882cad2c96cf44c9e46999f74266e02ee --- src/cpu/memtest/memtest.cc | 2 -- src/mem/cache/cache_impl.hh | 31 +++++-------------------------- src/mem/cache/miss/mshr.cc | 14 ++++++++++++++ src/mem/cache/miss/mshr.hh | 6 ++++++ src/mem/cache/miss/mshr_queue.cc | 15 +++++++++++++++ src/mem/cache/miss/mshr_queue.hh | 2 ++ 6 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index 86a33f44b..83417c514 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -359,7 +359,6 @@ MemTest::tick() if (probe) { cachePort.sendFunctional(pkt); - pkt->makeAtomicResponse(); completeRequest(pkt); } else { sendPkt(pkt); @@ -393,7 +392,6 @@ MemTest::tick() if (probe) { cachePort.sendFunctional(pkt); - pkt->makeAtomicResponse(); completeRequest(pkt); } else { sendPkt(pkt); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 150cf80b7..c1b01d676 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -641,33 +641,12 @@ Cache::functionalAccess(PacketPtr pkt, return; } - // Need to check for outstanding misses and writes - - // There can only be one matching outstanding miss. - MSHR *mshr = mshrQueue.findMatch(blk_addr); - if (mshr) { - MSHR::TargetList *targets = mshr->getTargetList(); - MSHR::TargetList::iterator i = targets->begin(); - MSHR::TargetList::iterator end = targets->end(); - for (; i != end; ++i) { - PacketPtr targetPkt = i->pkt; - if (pkt->checkFunctional(targetPkt)) - return; - } + // Need to check for outstanding misses and writes; if neither one + // satisfies, then forward to other side of cache. + if (!(mshrQueue.checkFunctional(pkt, blk_addr) || + writeBuffer.checkFunctional(pkt, blk_addr))) { + otherSidePort->checkAndSendFunctional(pkt); } - - // There can be many matching outstanding writes. - std::vector writes; - assert(!writeBuffer.findMatches(blk_addr, writes)); -/* Need to change this to iterate through targets in mshr?? - for (int i = 0; i < writes.size(); ++i) { - MSHR *mshr = writes[i]; - if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData)) - return; - } -*/ - - otherSidePort->checkAndSendFunctional(pkt); } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 5ba3d1ec5..7796773a3 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -118,6 +118,20 @@ MSHR::TargetList::clearDownstreamPending() } +bool +MSHR::TargetList::checkFunctional(PacketPtr pkt) +{ + Iterator end_i = end(); + for (Iterator i = begin(); i != end_i; ++i) { + if (pkt->checkFunctional(i->pkt)) { + return true; + } + } + + return false; +} + + void MSHR::allocate(Addr _addr, int _size, PacketPtr target, Tick whenReady, Counter _order) diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index e850a8633..c865ca3ac 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -82,6 +82,7 @@ class MSHR : public Packet::SenderState void add(PacketPtr pkt, Tick readyTime, Counter order, bool cpuSide); void replaceUpgrades(); void clearDownstreamPending(); + bool checkFunctional(PacketPtr pkt); }; /** A list of MSHRs. */ @@ -230,6 +231,11 @@ public: void handleFill(Packet *pkt, CacheBlk *blk); + bool checkFunctional(PacketPtr pkt) { + return (targets->checkFunctional(pkt) || + deferredTargets->checkFunctional(pkt)); + } + /** * Prints the contents of this MSHR to stderr. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 50a28fb3c..911329e0c 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -84,9 +84,24 @@ MSHRQueue::findMatches(Addr addr, vector& matches) const } } return retval; +} + +bool +MSHRQueue::checkFunctional(PacketPtr pkt, Addr blk_addr) +{ + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == blk_addr && mshr->checkFunctional(pkt)) { + return true; + } + } + return false; } + MSHR * MSHRQueue::findPending(Addr addr, int size) const { diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 1f1d59e98..447ebfc5a 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -115,6 +115,8 @@ class MSHRQueue */ MSHR *findPending(Addr addr, int size) const; + bool checkFunctional(PacketPtr pkt, Addr blk_addr); + /** * Allocates a new MSHR for the request and size. This places the request * as the first target in the MSHR. -- cgit v1.2.3 From 8705b0799bddef95d9957a03ee7ffb8fbb1bdec7 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 27 Jul 2007 12:46:55 -0700 Subject: packet: get rid of unused intersect() function. --HG-- extra : convert_revision : f0a2947ccc49e0d18bc17a59371fa396d9ebd6c0 --- src/mem/packet.cc | 12 ------------ src/mem/packet.hh | 3 --- 2 files changed, 15 deletions(-) diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 8cd356768..c7c6ec083 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -152,18 +152,6 @@ Packet::allocate() data = new uint8_t[getSize()]; } -/** Do the packet modify the same addresses. */ -bool -Packet::intersect(PacketPtr p) -{ - Addr s1 = getAddr(); - Addr e1 = getAddr() + getSize() - 1; - Addr s2 = p->getAddr(); - Addr e2 = p->getAddr() + p->getSize() - 1; - - return !(s1 > e2 || e1 < s2); -} - bool Packet::checkFunctional(Addr addr, int size, uint8_t *data) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index c6534d6c9..2b650a51e 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -564,9 +564,6 @@ class Packet : public FastAlloc /** If there isn't data in the packet, allocate some. */ void allocate(); - /** Do the packet modify the same addresses. */ - bool intersect(PacketPtr p); - /** * Check a functional request against a memory value represented * by a base/size pair and an associated data array. If the -- cgit v1.2.3 -- cgit v1.2.3 From aaf59949e58ceb617aa4efd04597a63c88638b9d Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 28 Jul 2007 18:00:05 -0700 Subject: AtomicSimpleCPU: fix inadvertent loss of endian conversion on read. --HG-- extra : convert_revision : 367bf2431bf4f4eb7c4d5723816e5db6f7233aed --- src/cpu/simple/atomic.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 054f67d69..b830cbf3a 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -293,6 +293,8 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) dcache_access = true; assert(!pkt.isError()); + data = gtoh(data); + if (req->isLocked()) { TheISA::handleLockedRead(thread, req); } -- cgit v1.2.3 From 4a7d0c4b79450e05b87da4cfc48c2361758127c1 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 29 Jul 2007 13:24:48 -0700 Subject: bus: take out response prioritization (timing was messed up). Also make express snoops not occupy bus (since they're magic). --HG-- extra : convert_revision : 75aa5211a59380026d1e3f122778425e48e2edcd --- src/mem/bus.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 518c9dbfa..cb359734b 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -180,7 +180,7 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (!(pkt->isResponse() || pkt->isExpressSnoop()) && + if (!pkt->isExpressSnoop() && (tickNextIdle > curTick || (retryList.size() && (!inRetry || src_port != retryList.front())))) { @@ -189,7 +189,9 @@ Bus::recvTiming(PacketPtr pkt) return false; } - occupyBus(pkt); + if (!pkt->isExpressSnoop()) { + occupyBus(pkt); + } short dest = pkt->getDest(); int dest_port_id; -- cgit v1.2.3 From 2f93db6f95b02d2bedf9571330a3185ac3fa7fa9 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 29 Jul 2007 20:17:03 -0700 Subject: memory system: fix functional access bug. Make sure not to keep processing functional accesses after they've been responded to. Also use checkFunctional() return value instead of checking packet command field where possible, mostly just for consistency. --HG-- extra : convert_revision : 29fc76bc18731bd93a4ed05a281297827028ef75 --- src/mem/cache/base_cache.cc | 4 ++-- src/mem/cache/cache_impl.hh | 8 ++++---- src/mem/physical.cc | 12 ++++++------ src/mem/tport.cc | 13 +++++++------ src/mem/tport.hh | 2 +- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index ec9e1cf9b..b44468486 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -81,9 +81,9 @@ BaseCache::CachePort::deviceBlockSize() void BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt) { - checkFunctional(pkt); - if (!pkt->isResponse()) + if (!checkFunctional(pkt)) { sendFunctional(pkt); + } } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c1b01d676..d144266ed 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1253,9 +1253,9 @@ template void Cache::CpuSidePort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - if (!pkt->isResponse()) + if (!checkFunctional(pkt)) { myCache()->functionalAccess(pkt, cache->memSidePort); + } } @@ -1327,9 +1327,9 @@ template void Cache::MemSidePort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - if (!pkt->isResponse()) + if (!checkFunctional(pkt)) { myCache()->functionalAccess(pkt, cache->cpuSidePort); + } } diff --git a/src/mem/physical.cc b/src/mem/physical.cc index b96fb8a56..2f358daf2 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -400,12 +400,12 @@ PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt) void PhysicalMemory::MemoryPort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - - // Default implementation of SimpleTimingPort::recvFunctional() - // calls recvAtomic() and throws away the latency; we can save a - // little here by just not calculating the latency. - memory->doFunctionalAccess(pkt); + if (!checkFunctional(pkt)) { + // Default implementation of SimpleTimingPort::recvFunctional() + // calls recvAtomic() and throws away the latency; we can save a + // little here by just not calculating the latency. + memory->doFunctionalAccess(pkt); + } } unsigned int diff --git a/src/mem/tport.cc b/src/mem/tport.cc index e4b8d70e9..b1a6a4813 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -30,7 +30,7 @@ #include "mem/tport.hh" -void +bool SimpleTimingPort::checkFunctional(PacketPtr pkt) { DeferredPacketIterator i = transmitList.begin(); @@ -41,19 +41,20 @@ SimpleTimingPort::checkFunctional(PacketPtr pkt) // If the target contains data, and it overlaps the // probed request, need to update data if (pkt->checkFunctional(target)) { - return; + return true; } } + + return false; } void SimpleTimingPort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - - // Just do an atomic access and throw away the returned latency - if (!pkt->isResponse()) + if (!checkFunctional(pkt)) { + // Just do an atomic access and throw away the returned latency recvAtomic(pkt); + } } bool diff --git a/src/mem/tport.hh b/src/mem/tport.hh index bc9da6c44..d0f1be425 100644 --- a/src/mem/tport.hh +++ b/src/mem/tport.hh @@ -99,7 +99,7 @@ class SimpleTimingPort : public Port /** Check the list of buffered packets against the supplied * functional request. */ - void checkFunctional(PacketPtr funcPkt); + bool checkFunctional(PacketPtr funcPkt); /** Check whether we have a packet ready to go on the transmit list. */ bool deferredPacketReady() -- cgit v1.2.3 From 62aa1d7f559622dcb04b1b2fe1e2ecec375883a3 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 3 Aug 2007 03:51:13 -0400 Subject: cache: get rid of obsolete params from python. --HG-- extra : convert_revision : cd40e0ef938ef6da1cccedf7be01c3ac5b4883fb --- src/mem/cache/BaseCache.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index 86148f821..2bf44cdf9 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -34,15 +34,9 @@ class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb'] class BaseCache(MemObject): type = 'BaseCache' - adaptive_compression = Param.Bool(False, - "Use an adaptive compression scheme") assoc = Param.Int("associativity") block_size = Param.Int("block size in bytes") latency = Param.Latency("Latency") - compressed_bus = Param.Bool(False, - "This cache connects to a compressed memory") - compression_latency = Param.Latency('0ns', - "Latency in cycles of compression algorithm") hash_delay = Param.Int(1, "time in cycles of hash access") lifo = Param.Bool(False, "whether this NIC partition should use LIFO repl. policy") @@ -56,8 +50,6 @@ class BaseCache(MemObject): split = Param.Bool(False, "whether or not this cache is split") split_size = Param.Int(0, "How many ways of the cache belong to CPU/LRU partition") - store_compressed = Param.Bool(False, - "Store compressed data in the cache") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") -- cgit v1.2.3 From a0bf2535a80c59329c8c14bba724b90fba05da4b Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 3 Aug 2007 18:03:59 -0400 Subject: tests: config.out no longer exists, eliminate ref copy. --HG-- extra : convert_revision : e2d6aa61aa2ffd1a9d16260244512eeb1fe4d5a3 --- tests/SConscript | 2 +- .../00.gzip/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../00.gzip/ref/sparc/linux/o3-timing/config.out | 366 ------ .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../30.eon/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../50.vortex/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../60.bzip2/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../70.twolf/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../sparc/solaris/t1000-simple-atomic/config.out | 417 ------- .../00.hello/ref/alpha/linux/o3-timing/config.out | 366 ------ .../ref/alpha/linux/simple-atomic/config.out | 58 - .../ref/alpha/linux/simple-timing/config.out | 177 --- .../00.hello/ref/alpha/tru64/o3-timing/config.out | 366 ------ .../ref/alpha/tru64/simple-atomic/config.out | 58 - .../ref/alpha/tru64/simple-timing/config.out | 177 --- .../ref/mips/linux/simple-atomic/config.out | 58 - .../ref/mips/linux/simple-timing/config.out | 177 --- .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../ref/alpha/linux/o3-timing/config.out | 382 ------ .../ref/sparc/linux/o3-timing/config.out | 366 ------ .../ref/sparc/linux/simple-atomic/config.out | 58 - .../ref/sparc/linux/simple-timing/config.out | 177 --- .../linux/tsunami-simple-atomic-dual/config.out | 888 -------------- .../alpha/linux/tsunami-simple-atomic/config.out | 773 ------------ .../linux/tsunami-simple-timing-dual/config.out | 888 -------------- .../alpha/linux/tsunami-simple-timing/config.out | 773 ------------ .../20.eio-short/ref/alpha/eio/detailed/config.out | 279 ----- .../ref/alpha/eio/simple-atomic/config.out | 49 - .../ref/alpha/eio/simple-timing/config.out | 168 --- .../50.memtest/ref/alpha/linux/memtest/config.out | 516 -------- .../linux/twosys-tsunami-simple-atomic/config.out | 1268 -------------------- 51 files changed, 1 insertion(+), 13221 deletions(-) delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out delete mode 100644 tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out delete mode 100644 tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out delete mode 100644 tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out delete mode 100644 tests/quick/00.hello/ref/mips/linux/simple-timing/config.out delete mode 100644 tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out delete mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out delete mode 100644 tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out delete mode 100644 tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out delete mode 100644 tests/quick/50.memtest/ref/alpha/linux/memtest/config.out delete mode 100644 tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out diff --git a/tests/SConscript b/tests/SConscript index 472fa3f4c..812ce8c11 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -114,7 +114,7 @@ def update_test(target, source, env): src_dir = str(source[1].get_dir()) dest_files = os.listdir(dest_dir) src_files = os.listdir(src_dir) - for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini', 'config.out'): + for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini'): if f in dest_files: print " Replacing file", f dest_files.remove(f) diff --git a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index 24d41aaa7..000000000 --- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out b/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index 589507187..000000000 --- a/tests/long/00.gzip/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out b/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index 5cab10662..000000000 --- a/tests/long/00.gzip/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/gzip -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/00.gzip/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out deleted file mode 100644 index b8a2728b3..000000000 --- a/tests/long/00.gzip/ref/sparc/linux/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out b/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index 9c608a7e6..000000000 --- a/tests/long/00.gzip/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out b/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index d8a055b90..000000000 --- a/tests/long/00.gzip/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=gzip input.log 1 -executable=/dist/m5/cpu2000/binaries/sparc/linux/gzip -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/00.gzip/sparc/linux/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index b84a9d780..000000000 --- a/tests/long/10.mcf/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=mcf mcf.in -executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf -input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out b/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index 81e06c995..000000000 --- a/tests/long/10.mcf/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=mcf mcf.in -executable=/dist/m5/cpu2000/binaries/sparc/linux/mcf -input=/dist/m5/cpu2000/data/mcf/smred/input/mcf.in -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/10.mcf/sparc/linux/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out b/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index cea0c0402..000000000 --- a/tests/long/30.eon/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook -executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out b/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index b7319250f..000000000 --- a/tests/long/30.eon/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook -executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out b/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index c3af4f4b3..000000000 --- a/tests/long/30.eon/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=eon chair.control.cook chair.camera chair.surfaces chair.cook.ppm ppm pixels_out.cook -executable=/dist/m5/cpu2000/binaries/alpha/tru64/eon -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/30.eon/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out b/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index 0e4ea1cb5..000000000 --- a/tests/long/40.perlbmk/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=perlbmk -I. -I lib lgred.makerand.pl -executable=/dist/m5/cpu2000/binaries/alpha/tru64/perlbmk -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/40.perlbmk/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out b/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index 676b65128..000000000 --- a/tests/long/40.perlbmk/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=perlbmk -I. -I lib lgred.makerand.pl -executable=/dist/m5/cpu2000/binaries/alpha/tru64/perlbmk -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/40.perlbmk/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out b/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index 071b401c0..000000000 --- a/tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=vortex lendian.raw -executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out b/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index bf2c5c795..000000000 --- a/tests/long/50.vortex/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=vortex lendian.raw -executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out b/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index c0cb264bc..000000000 --- a/tests/long/50.vortex/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=vortex lendian.raw -executable=/dist/m5/cpu2000/binaries/alpha/tru64/vortex -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out b/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index 4d97fe26f..000000000 --- a/tests/long/50.vortex/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=vortex bendian.raw -executable=/dist/m5/cpu2000/binaries/sparc/linux/vortex -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/50.vortex/sparc/linux/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out b/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index c2fb507ae..000000000 --- a/tests/long/50.vortex/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=vortex bendian.raw -executable=/dist/m5/cpu2000/binaries/sparc/linux/vortex -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/50.vortex/sparc/linux/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out b/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index ea4848b9b..000000000 --- a/tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=bzip2 input.source 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2 -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out b/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index fc081bf5e..000000000 --- a/tests/long/60.bzip2/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=bzip2 input.source 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2 -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out b/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index 55a09db2b..000000000 --- a/tests/long/60.bzip2/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=bzip2 input.source 1 -executable=/dist/m5/cpu2000/binaries/alpha/tru64/bzip2 -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/60.bzip2/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out b/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index e3bf50f10..000000000 --- a/tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=twolf smred -executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/o3-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out b/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index 47defa937..000000000 --- a/tests/long/70.twolf/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=twolf smred -executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out b/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index 3ed492885..000000000 --- a/tests/long/70.twolf/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=twolf smred -executable=/dist/m5/cpu2000/binaries/alpha/tru64/twolf -input=cin -output=cout -env= -cwd=build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out b/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index d448056f4..000000000 --- a/tests/long/70.twolf/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=twolf smred -executable=/dist/m5/cpu2000/binaries/sparc/linux/twolf -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/70.twolf/sparc/linux/simple-atomic -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out b/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index f79151c21..000000000 --- a/tests/long/70.twolf/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=twolf smred -executable=/dist/m5/cpu2000/binaries/sparc/linux/twolf -input=cin -output=cout -env= -cwd=build/SPARC_SE/tests/fast/long/70.twolf/sparc/linux/simple-timing -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out b/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out deleted file mode 100644 index bc35fc4e7..000000000 --- a/tests/long/80.solaris-boot/ref/sparc/solaris/t1000-simple-atomic/config.out +++ /dev/null @@ -1,417 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[1048576,68157439] -latency=1 -zero=true - -[system.rom] -type=PhysicalMemory -file= -range=[1099243192320,1099251580927] -latency=1 -zero=false - -[system.nvram] -type=PhysicalMemory -file= -range=[133429198848,133429207039] -latency=1 -zero=false - -[system.hypervisor_desc] -type=PhysicalMemory -file= -range=[133446500352,133446508543] -latency=1 -zero=false - -[system.partition_desc] -type=PhysicalMemory -file= -range=[133445976064,133445984255] -latency=1 -zero=false - -[system] -type=SparcSystem -physmem=system.physmem -rom=system.rom -nvram=system.nvram -hypervisor_desc=system.hypervisor_desc -partition_desc=system.partition_desc -mem_mode=atomic -reset_addr=1099243192320 -hypervisor_addr=1099243257856 -openboot_addr=1099243716608 -nvram_addr=133429198848 -hypervisor_desc_addr=133446500352 -partition_desc_addr=133445976064 -kernel= -reset_bin=/dist/m5/system/binaries/reset_new.bin -hypervisor_bin=/dist/m5/system/binaries/q_new.bin -openboot_bin=/dist/m5/system/binaries/openboot_new.bin -nvram_bin=/dist/m5/system/binaries/nvram1 -hypervisor_desc_bin=/dist/m5/system/binaries/1up-hv.bin -partition_desc_bin=/dist/m5/system/binaries/1up-md.bin -boot_cpu_frequency=1 -boot_osflags=a -readfile=tests/halt.sh -init_param=0 - -[system.membus] -type=Bus -bus_id=1 -clock=2 -width=64 -responder_set=false -block_size=64 - -[system.intrctrl] -type=IntrControl -sys=system - -[system.t1000] -type=T1000 -system=system -intrctrl=system.intrctrl - -[system.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=0 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.t1000 -system=system - -[system.physmem2] -type=PhysicalMemory -file= -range=[2147483648,2415919103] -latency=1 -zero=true - -[system.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=100 -nack_delay=8 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[system.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/disk.s10hw2 -read_only=true - -[system.disk0.image] -type=CowDiskImage -child=system.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk0] -type=MmDisk -pio_addr=134217728000 -pio_latency=2 -// pio_size not specified -platform=system.t1000 -system=system -image=system.disk0.image - -[system.t1000.hconsole] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.t1000.hvuart] -type=Uart8250 -pio_addr=1099255955456 -pio_latency=2 -platform=system.t1000 -sim_console=system.t1000.hconsole -system=system - -[system.t1000.htod] -type=DumbTOD -pio_addr=1099255906296 -pio_latency=2 -platform=system.t1000 -system=system -time=2009 1 1 0 0 0 3 1 - -[system.t1000.pconsole] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.t1000.puart0] -type=Uart8250 -pio_addr=133412421632 -pio_latency=2 -platform=system.t1000 -sim_console=system.t1000.pconsole -system=system - -[system.t1000.fake_membnks] -type=IsaFake -pio_addr=648540061696 -pio_latency=2 -pio_size=16384 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=0 -platform=system.t1000 -system=system - -[system.t1000.fake_ssi] -type=IsaFake -pio_addr=1095216660480 -pio_latency=2 -pio_size=268435456 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.t1000 -system=system - -[system.t1000.fake_l2_4] -type=IsaFake -pio_addr=725849473216 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=1 -platform=system.t1000 -system=system - -[system.t1000.fake_l2_1] -type=IsaFake -pio_addr=725849473024 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=1 -platform=system.t1000 -system=system - -[system.t1000.fake_l2_2] -type=IsaFake -pio_addr=725849473088 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=1 -platform=system.t1000 -system=system - -[system.t1000.fake_l2_3] -type=IsaFake -pio_addr=725849473152 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=1 -platform=system.t1000 -system=system - -[system.t1000.fake_l2esr_3] -type=IsaFake -pio_addr=734439407744 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=0 -platform=system.t1000 -system=system - -[system.t1000.fake_l2esr_2] -type=IsaFake -pio_addr=734439407680 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=0 -platform=system.t1000 -system=system - -[system.t1000.fake_l2esr_1] -type=IsaFake -pio_addr=734439407616 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=0 -platform=system.t1000 -system=system - -[system.t1000.fake_l2esr_4] -type=IsaFake -pio_addr=734439407808 -pio_latency=2 -pio_size=8 -ret_bad_addr=false -update_data=true -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=0 -platform=system.t1000 -system=system - -[system.t1000.iob] -type=Iob -pio_latency=2 -platform=system.t1000 -system=system - -[system.t1000.fake_clk] -type=IsaFake -pio_addr=644245094400 -pio_latency=2 -pio_size=4294967296 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.t1000 -system=system - -[system.t1000.fake_jbi] -type=IsaFake -pio_addr=549755813888 -pio_latency=2 -pio_size=4294967296 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.t1000 -system=system - -[system.iobus] -type=Bus -bus_id=0 -clock=2 -width=64 -responder_set=false -block_size=64 - -[system.iobus.responder] -type=IsaFake -pio_addr=0 -pio_latency=0 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.t1000 -system=system - -[system.cpu.itb] -type=SparcITB -size=64 - -[system.cpu.dtb] -type=SparcDTB -size=64 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -itb=system.cpu.itb -dtb=system.cpu.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=1 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out deleted file mode 100644 index 7cb2e7d7d..000000000 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out deleted file mode 100644 index 117159126..000000000 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out deleted file mode 100644 index f7852a616..000000000 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out deleted file mode 100644 index 0cb6591c8..000000000 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/tru64/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out deleted file mode 100644 index acc734991..000000000 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/tru64/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out deleted file mode 100644 index 241630ead..000000000 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/tru64/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out deleted file mode 100644 index 06a3d271d..000000000 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/mips/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out deleted file mode 100644 index 3f8a51cf4..000000000 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/mips/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index 1666790d0..000000000 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/sparc/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index 89910d3c9..000000000 --- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/sparc/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out deleted file mode 100644 index 45b063eb3..000000000 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.out +++ /dev/null @@ -1,382 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload0] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.workload1] -type=LiveProcess -cmd=hello -executable=tests/test-progs/hello/bin/alpha/linux/hello -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload0 system.cpu.workload1 -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out deleted file mode 100644 index bdf29a72a..000000000 --- a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out +++ /dev/null @@ -1,366 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=insttest -executable=tests/test-progs/insttest/bin/sparc/linux/insttest -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=500 -phase=0 -numThreads=1 -cpu_id=0 -activity=0 -workload=system.cpu.workload -checker=null -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=20 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out deleted file mode 100644 index c1a77ba0d..000000000 --- a/tests/quick/02.insttest/ref/sparc/linux/simple-atomic/config.out +++ /dev/null @@ -1,58 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=insttest -executable=tests/test-progs/insttest/bin/sparc/linux/insttest -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out deleted file mode 100644 index df1a9c852..000000000 --- a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out +++ /dev/null @@ -1,177 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=LiveProcess -cmd=insttest -executable=tests/test-progs/insttest/bin/sparc/linux/insttest -input=cin -output=cout -env= -cwd= -system=system -uid=100 -euid=100 -gid=100 -egid=100 -pid=100 -ppid=99 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out deleted file mode 100644 index 1461f2550..000000000 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.out +++ /dev/null @@ -1,888 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=LinuxAlphaSystem -boot_cpu_frequency=500 -physmem=system.physmem -mem_mode=atomic -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=tests/halt.sh -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[system.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.intrctrl] -type=IntrControl -sys=system - -[system.tsunami] -type=Tsunami -system=system -intrctrl=system.intrctrl - -[system.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.l2c] -type=BaseCache -size=4194304 -assoc=8 -block_size=64 -latency=10000 -mshrs=92 -tgts_per_mshr=16 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[system.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.disk0.image] -type=CowDiskImage -child=system.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk0] -type=IdeDisk -image=system.disk0.image -driveID=master -delay=1000000 - -[system.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[system.disk2.image] -type=CowDiskImage -child=system.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk2] -type=IdeDisk -image=system.disk2.image -driveID=master -delay=1000000 - -[system.cpu0.itb] -type=AlphaITB -size=48 - -[system.cpu0.dtb] -type=AlphaDTB -size=64 - -[system.cpu0] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -itb=system.cpu0.itb -dtb=system.cpu0.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - -[system.cpu0.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu0.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu0.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu0.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu0.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu0.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu1.itb] -type=AlphaITB -size=48 - -[system.cpu1.dtb] -type=AlphaDTB -size=64 - -[system.cpu1] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=1 -itb=system.cpu1.itb -dtb=system.cpu1.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - -[system.cpu1.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu1.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu1.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu1.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu1.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu1.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.simple_disk] -type=SimpleDisk -system=system -disk=system.simple_disk.disk - -[system.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=system.tsunami -system=system -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=system.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=system.tsunami -system=system - -[system.sim_console] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.tsunami.console] -type=AlphaConsole -sim_console=system.sim_console -disk=system.simple_disk -pio_addr=8804682956800 -system=system -cpu=system.cpu0 -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=system -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[system.tsunami.ethernet] -type=NSGigE -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:01 -rx_thread=false -tx_thread=false -rss=false - -[system.tsunami.etherint] -type=NSGigEInt -peer=null -device=system.tsunami.ethernet - -[system.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=system.tsunami -sim_console=system.sim_console -system=system - -[system.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[system.tsunami.ide] -type=IdeController -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=system.disk0 system.disk2 - -[system.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -[system.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.toL2Bus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out deleted file mode 100644 index a196b7dc6..000000000 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.out +++ /dev/null @@ -1,773 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=LinuxAlphaSystem -boot_cpu_frequency=500 -physmem=system.physmem -mem_mode=atomic -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=tests/halt.sh -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[system.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.intrctrl] -type=IntrControl -sys=system - -[system.tsunami] -type=Tsunami -system=system -intrctrl=system.intrctrl - -[system.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.l2c] -type=BaseCache -size=4194304 -assoc=8 -block_size=64 -latency=10000 -mshrs=92 -tgts_per_mshr=16 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[system.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.disk0.image] -type=CowDiskImage -child=system.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk0] -type=IdeDisk -image=system.disk0.image -driveID=master -delay=1000000 - -[system.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[system.disk2.image] -type=CowDiskImage -child=system.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk2] -type=IdeDisk -image=system.disk2.image -driveID=master -delay=1000000 - -[system.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.simple_disk] -type=SimpleDisk -system=system -disk=system.simple_disk.disk - -[system.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=system.tsunami -system=system -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=system.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=system.tsunami -system=system - -[system.sim_console] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.cpu.itb] -type=AlphaITB -size=48 - -[system.cpu.dtb] -type=AlphaDTB -size=64 - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -itb=system.cpu.itb -dtb=system.cpu.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - -[system.tsunami.console] -type=AlphaConsole -sim_console=system.sim_console -disk=system.simple_disk -pio_addr=8804682956800 -system=system -cpu=system.cpu -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=system -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[system.tsunami.ethernet] -type=NSGigE -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:01 -rx_thread=false -tx_thread=false -rss=false - -[system.tsunami.etherint] -type=NSGigEInt -peer=null -device=system.tsunami.ethernet - -[system.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=system.tsunami -sim_console=system.sim_console -system=system - -[system.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[system.tsunami.ide] -type=IdeController -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=system.disk0 system.disk2 - -[system.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.toL2Bus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -[system.cpu.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out deleted file mode 100644 index bb98fee3e..000000000 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.out +++ /dev/null @@ -1,888 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=LinuxAlphaSystem -boot_cpu_frequency=500 -physmem=system.physmem -mem_mode=timing -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=tests/halt.sh -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[system.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.intrctrl] -type=IntrControl -sys=system - -[system.tsunami] -type=Tsunami -system=system -intrctrl=system.intrctrl - -[system.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.l2c] -type=BaseCache -size=4194304 -assoc=8 -block_size=64 -latency=10000 -mshrs=92 -tgts_per_mshr=16 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[system.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.disk0.image] -type=CowDiskImage -child=system.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk0] -type=IdeDisk -image=system.disk0.image -driveID=master -delay=1000000 - -[system.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[system.disk2.image] -type=CowDiskImage -child=system.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk2] -type=IdeDisk -image=system.disk2.image -driveID=master -delay=1000000 - -[system.cpu0.itb] -type=AlphaITB -size=48 - -[system.cpu0.dtb] -type=AlphaDTB -size=64 - -[system.cpu0] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -itb=system.cpu0.itb -dtb=system.cpu0.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu0.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu0.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu0.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu0.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu0.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu0.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu1.itb] -type=AlphaITB -size=48 - -[system.cpu1.dtb] -type=AlphaDTB -size=64 - -[system.cpu1] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=1 -itb=system.cpu1.itb -dtb=system.cpu1.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu1.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu1.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu1.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu1.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu1.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu1.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.simple_disk] -type=SimpleDisk -system=system -disk=system.simple_disk.disk - -[system.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=system.tsunami -system=system -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=system.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=system.tsunami -system=system - -[system.sim_console] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.tsunami.console] -type=AlphaConsole -sim_console=system.sim_console -disk=system.simple_disk -pio_addr=8804682956800 -system=system -cpu=system.cpu0 -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=system -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[system.tsunami.ethernet] -type=NSGigE -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:01 -rx_thread=false -tx_thread=false -rss=false - -[system.tsunami.etherint] -type=NSGigEInt -peer=null -device=system.tsunami.ethernet - -[system.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=system.tsunami -sim_console=system.sim_console -system=system - -[system.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[system.tsunami.ide] -type=IdeController -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=system.disk0 system.disk2 - -[system.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -[system.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.toL2Bus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out deleted file mode 100644 index e0e32bce4..000000000 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.out +++ /dev/null @@ -1,773 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=LinuxAlphaSystem -boot_cpu_frequency=500 -physmem=system.physmem -mem_mode=timing -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=tests/halt.sh -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[system.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.intrctrl] -type=IntrControl -sys=system - -[system.tsunami] -type=Tsunami -system=system -intrctrl=system.intrctrl - -[system.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.l2c] -type=BaseCache -size=4194304 -assoc=8 -block_size=64 -latency=10000 -mshrs=92 -tgts_per_mshr=16 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[system.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.disk0.image] -type=CowDiskImage -child=system.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk0] -type=IdeDisk -image=system.disk0.image -driveID=master -delay=1000000 - -[system.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[system.disk2.image] -type=CowDiskImage -child=system.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[system.disk2] -type=IdeDisk -image=system.disk2.image -driveID=master -delay=1000000 - -[system.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[system.simple_disk] -type=SimpleDisk -system=system -disk=system.simple_disk.disk - -[system.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=system.tsunami -system=system -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=system.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=system.tsunami -system=system - -[system.sim_console] -type=SimConsole -intr_control=system.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[system.cpu.itb] -type=AlphaITB -size=48 - -[system.cpu.dtb] -type=AlphaDTB -size=64 - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -itb=system.cpu.itb -dtb=system.cpu.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.tsunami.console] -type=AlphaConsole -sim_console=system.sim_console -disk=system.simple_disk -pio_addr=8804682956800 -system=system -cpu=system.cpu -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=system.tsunami -system=system -tsunami=system.tsunami - -[system.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=system -platform=system.tsunami -pio_latency=1000 - -[system.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[system.tsunami.ethernet] -type=NSGigE -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:01 -rx_thread=false -tx_thread=false -rss=false - -[system.tsunami.etherint] -type=NSGigEInt -peer=null -device=system.tsunami.ethernet - -[system.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=system.tsunami -sim_console=system.sim_console -system=system - -[system.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[system.tsunami.ide] -type=IdeController -system=system -platform=system.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=system.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=system.disk0 system.disk2 - -[system.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.toL2Bus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=system.tsunami -system=system - -[system.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -[system.cpu.icache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu.icache] -type=BaseCache -size=32768 -assoc=1 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu.icache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu.dcache] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=4 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu.dcache.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out b/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out deleted file mode 100644 index c92557696..000000000 --- a/tests/quick/20.eio-short/ref/alpha/eio/detailed/config.out +++ /dev/null @@ -1,279 +0,0 @@ -[root] -type=Root -clock=1000000000000 -max_tick=0 -progress_interval=0 -output_file=cout - -[system.physmem] -type=PhysicalMemory -file= -// range not specified -latency=1 - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.workload] -type=EioProcess -file=/z/ktlim2/clean/newmem-merge/tests/test-progs/anagram/bin/anagram-vshort.eio.gz -chkpt= -output=cout -system=system - -[system.cpu.mem] -type=Bus -bus_id=0 - -[system.cpu.fuPool.FUList0.opList0] -type=OpDesc -opClass=IntAlu -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList0] -type=FUDesc -opList=system.cpu.fuPool.FUList0.opList0 -count=6 - -[system.cpu.fuPool.FUList1.opList0] -type=OpDesc -opClass=IntMult -opLat=3 -issueLat=1 - -[system.cpu.fuPool.FUList1.opList1] -type=OpDesc -opClass=IntDiv -opLat=20 -issueLat=19 - -[system.cpu.fuPool.FUList1] -type=FUDesc -opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 -count=2 - -[system.cpu.fuPool.FUList2.opList0] -type=OpDesc -opClass=FloatAdd -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList1] -type=OpDesc -opClass=FloatCmp -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2.opList2] -type=OpDesc -opClass=FloatCvt -opLat=2 -issueLat=1 - -[system.cpu.fuPool.FUList2] -type=FUDesc -opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 -count=4 - -[system.cpu.fuPool.FUList3.opList0] -type=OpDesc -opClass=FloatMult -opLat=4 -issueLat=1 - -[system.cpu.fuPool.FUList3.opList1] -type=OpDesc -opClass=FloatDiv -opLat=12 -issueLat=12 - -[system.cpu.fuPool.FUList3.opList2] -type=OpDesc -opClass=FloatSqrt -opLat=24 -issueLat=24 - -[system.cpu.fuPool.FUList3] -type=FUDesc -opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 -count=2 - -[system.cpu.fuPool.FUList4.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList4] -type=FUDesc -opList=system.cpu.fuPool.FUList4.opList0 -count=0 - -[system.cpu.fuPool.FUList5.opList0] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList5] -type=FUDesc -opList=system.cpu.fuPool.FUList5.opList0 -count=0 - -[system.cpu.fuPool.FUList6.opList0] -type=OpDesc -opClass=MemRead -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6.opList1] -type=OpDesc -opClass=MemWrite -opLat=1 -issueLat=1 - -[system.cpu.fuPool.FUList6] -type=FUDesc -opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 -count=4 - -[system.cpu.fuPool.FUList7.opList0] -type=OpDesc -opClass=IprAccess -opLat=3 -issueLat=3 - -[system.cpu.fuPool.FUList7] -type=FUDesc -opList=system.cpu.fuPool.FUList7.opList0 -count=1 - -[system.cpu.fuPool] -type=FUPool -FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 - -[system.cpu] -type=DerivO3CPU -clock=1 -numThreads=1 -activity=0 -workload=system.workload -mem=system.cpu.mem -checker=null -max_insts_any_thread=500000 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -cachePorts=200 -decodeToFetchDelay=1 -renameToFetchDelay=1 -iewToFetchDelay=1 -commitToFetchDelay=1 -fetchWidth=8 -renameToDecodeDelay=1 -iewToDecodeDelay=1 -commitToDecodeDelay=1 -fetchToDecodeDelay=1 -decodeWidth=8 -iewToRenameDelay=1 -commitToRenameDelay=1 -decodeToRenameDelay=1 -renameWidth=8 -commitToIEWDelay=1 -renameToIEWDelay=2 -issueToExecuteDelay=1 -dispatchWidth=8 -issueWidth=8 -wbWidth=8 -wbDepth=1 -fuPool=system.cpu.fuPool -iewToCommitDelay=1 -renameToROBDelay=1 -commitWidth=8 -squashWidth=8 -trapLatency=13 -backComSize=5 -forwardComSize=5 -predType=tournament -localPredictorSize=2048 -localCtrBits=2 -localHistoryTableSize=2048 -localHistoryBits=11 -globalPredictorSize=8192 -globalCtrBits=2 -globalHistoryBits=13 -choicePredictorSize=8192 -choiceCtrBits=2 -BTBEntries=4096 -BTBTagSize=16 -RASSize=16 -LQEntries=32 -SQEntries=32 -LFSTSize=1024 -SSITSize=1024 -numPhysIntRegs=256 -numPhysFloatRegs=256 -numIQEntries=64 -numROBEntries=192 -smtNumFetchingThreads=1 -smtFetchPolicy=SingleThread -smtLSQPolicy=Partitioned -smtLSQThreshold=100 -smtIQPolicy=Partitioned -smtIQThreshold=100 -smtROBPolicy=Partitioned -smtROBThreshold=100 -smtCommitPolicy=RoundRobin -instShiftAmt=2 -defer_registration=false -function_trace=false -function_trace_start=0 - -[trace] -flags= -start=0 -bufsize=0 -file=cout -dump_on_exit=false -ignore= - -[stats] -descriptions=true -project_name=test -simulation_name=test -simulation_sample=0 -text_file=m5stats.txt -text_compat=true -mysql_db= -mysql_user= -mysql_password= -mysql_host= -events_start=-1 -dump_reset=false -dump_cycle=0 -dump_period=0 -ignore_events= - -[random] -seed=1 - -[exetrace] -speculative=true -print_cycle=true -print_opclass=true -print_thread=true -print_effaddr=true -print_data=true -print_iregs=false -print_fetchseq=false -print_cpseq=false -pc_symbol=true -intel_format=false -trace_system=client - -[debug] -break_cycles= - diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out deleted file mode 100644 index 73c363bc4..000000000 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-atomic/config.out +++ /dev/null @@ -1,49 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=EioProcess -file=tests/test-progs/anagram/bin/alpha/eio/anagram-vshort.eio.gz -chkpt= -output=cout -system=system - -[system.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=500000 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - diff --git a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out b/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out deleted file mode 100644 index e85a0bee1..000000000 --- a/tests/quick/20.eio-short/ref/alpha/eio/simple-timing/config.out +++ /dev/null @@ -1,168 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=atomic - -[system.membus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.workload] -type=EioProcess -file=tests/test-progs/anagram/bin/alpha/eio/anagram-vshort.eio.gz -chkpt= -output=cout -system=system - -[system.cpu] -type=TimingSimpleCPU -max_insts_any_thread=500000 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=system -cpu_id=0 -workload=system.cpu.workload -clock=500 -phase=0 -defer_registration=false -// width not specified -function_trace=false -function_trace_start=0 -// simulate_stalls not specified - -[system.cpu.toL2Bus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[system.cpu.icache] -type=BaseCache -size=131072 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.dcache] -type=BaseCache -size=262144 -assoc=2 -block_size=64 -latency=1000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu.l2cache] -type=BaseCache -size=2097152 -assoc=2 -block_size=64 -latency=10000 -mshrs=10 -tgts_per_mshr=5 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out deleted file mode 100644 index 6bf1f2712..000000000 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.out +++ /dev/null @@ -1,516 +0,0 @@ -[root] -type=Root -dummy=0 - -[system.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system] -type=System -physmem=system.physmem -mem_mode=timing - -[system.membus] -type=Bus -bus_id=0 -clock=2 -width=16 -responder_set=false -block_size=64 - -[system.l2c] -type=BaseCache -size=65536 -assoc=8 -block_size=64 -latency=10000 -mshrs=92 -tgts_per_mshr=16 -write_buffers=8 -prioritizeRequests=false -protocol=null -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=100000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu6] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu6.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu6.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu6.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu4] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu4.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu4.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu4.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu5] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu5.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu5.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu5.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu2] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu2.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu2.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu2.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu3] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu3.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu3.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu3.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu0] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu0.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu0.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu0.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.cpu1] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu1.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu1.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu1.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.funcmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[system.cpu7] -type=MemTest -memory_size=65536 -percent_reads=65 -percent_functional=50 -percent_uncacheable=10 -progress_interval=10000 -percent_source_unaligned=50 -percent_dest_unaligned=50 -trace_addr=0 -max_loads=100000 -atomic=false - -[system.cpu7.l1c.protocol] -type=CoherenceProtocol -protocol=moesi -do_upgrades=true - -[system.cpu7.l1c] -type=BaseCache -size=32768 -assoc=4 -block_size=64 -latency=1000 -mshrs=12 -tgts_per_mshr=8 -write_buffers=8 -prioritizeRequests=false -protocol=system.cpu7.l1c.protocol -trace_addr=0 -hash_delay=1 -repl=null -compressed_bus=false -store_compressed=false -adaptive_compression=false -compression_latency=0 -block_size=64 -max_miss_count=0 -addr_range=[0,18446744073709551615] -split=false -split_size=0 -lifo=false -two_queue=false -prefetch_miss=false -prefetch_access=false -prefetcher_size=100 -prefetch_past_page=false -prefetch_serial_squash=false -prefetch_latency=10000 -prefetch_degree=1 -prefetch_policy=none -prefetch_cache_check_push=true -prefetch_use_cpu_id=true -prefetch_data_accesses_only=false - -[system.toL2Bus] -type=Bus -bus_id=0 -clock=2 -width=16 -responder_set=false -block_size=64 - diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out deleted file mode 100644 index 1ed581be9..000000000 --- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.out +++ /dev/null @@ -1,1268 +0,0 @@ -[root] -type=Root -dummy=0 - -[testsys.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[testsys] -type=LinuxAlphaSystem -boot_cpu_frequency=1 -physmem=testsys.physmem -mem_mode=atomic -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-stream-client.rcS -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[testsys.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[testsys.intrctrl] -type=IntrControl -sys=testsys - -[testsys.tsunami] -type=Tsunami -system=testsys -intrctrl=testsys.intrctrl - -[testsys.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[testsys.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[testsys.disk0.image] -type=CowDiskImage -child=testsys.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[testsys.disk0] -type=IdeDisk -image=testsys.disk0.image -driveID=master -delay=1000000 - -[testsys.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[testsys.disk2.image] -type=CowDiskImage -child=testsys.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[testsys.disk2] -type=IdeDisk -image=testsys.disk2.image -driveID=master -delay=1000000 - -[testsys.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[testsys.simple_disk] -type=SimpleDisk -system=testsys -disk=testsys.simple_disk.disk - -[testsys.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=testsys.tsunami -system=testsys -tsunami=testsys.tsunami - -[testsys.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=testsys.tsunami -system=testsys -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=testsys.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=testsys.tsunami -system=testsys - -[testsys.sim_console] -type=SimConsole -intr_control=testsys.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[testsys.cpu.itb] -type=AlphaITB -size=48 - -[testsys.cpu.dtb] -type=AlphaDTB -size=64 - -[testsys.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=testsys -cpu_id=0 -itb=testsys.cpu.itb -dtb=testsys.cpu.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=1 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - -[testsys.tsunami.console] -type=AlphaConsole -sim_console=testsys.sim_console -disk=testsys.simple_disk -pio_addr=8804682956800 -system=testsys -cpu=testsys.cpu -platform=testsys.tsunami -pio_latency=1000 - -[testsys.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=testsys.tsunami -system=testsys -tsunami=testsys.tsunami - -[testsys.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=testsys -platform=testsys.tsunami -pio_latency=1000 - -[testsys.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[testsys.tsunami.ethernet] -type=NSGigE -system=testsys -platform=testsys.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=testsys.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:02 -rx_thread=false -tx_thread=false -rss=false - -[testsys.tsunami.etherint] -type=NSGigEInt -peer=null -device=testsys.tsunami.ethernet - -[testsys.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=testsys.tsunami -sim_console=testsys.sim_console -system=testsys - -[testsys.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=testsys.tsunami -system=testsys - -[testsys.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[testsys.tsunami.ide] -type=IdeController -system=testsys -platform=testsys.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=testsys.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=testsys.disk0 testsys.disk2 - -[testsys.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -[drivesys.physmem] -type=PhysicalMemory -file= -range=[0,134217727] -latency=1 -zero=false - -[drivesys] -type=LinuxAlphaSystem -boot_cpu_frequency=1 -physmem=drivesys.physmem -mem_mode=atomic -kernel=/dist/m5/system/binaries/vmlinux -console=/dist/m5/system/binaries/console -pal=/dist/m5/system/binaries/ts_osfpal -boot_osflags=root=/dev/hda1 console=ttyS0 -readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-server.rcS -symbolfile= -init_param=0 -system_type=34 -system_rev=1024 - -[drivesys.intrctrl] -type=IntrControl -sys=drivesys - -[drivesys.tsunami] -type=Tsunami -system=drivesys -intrctrl=drivesys.intrctrl - -[drivesys.tsunami.ethernet.configdata] -type=PciConfigData -VendorID=4107 -DeviceID=34 -Command=0 -Status=656 -Revision=0 -ProgIF=0 -SubClassCode=0 -ClassCode=2 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=0 -BAR2=0 -BAR3=0 -BAR4=0 -BAR5=0 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=30 -InterruptPin=1 -MinimumGrant=176 -MaximumLatency=52 -BAR0Size=256 -BAR1Size=4096 -BAR2Size=0 -BAR3Size=0 -BAR4Size=0 -BAR5Size=0 - -[drivesys.tsunami.ethernet] -type=NSGigE -system=drivesys -platform=drivesys.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=drivesys.tsunami.ethernet.configdata -pci_bus=0 -pci_dev=1 -pci_func=0 -pio_latency=1000 -config_latency=20000 -clock=0 -dma_desc_free=false -dma_data_free=false -dma_read_delay=0 -dma_write_delay=0 -dma_read_factor=0 -dma_write_factor=0 -dma_no_allocate=true -intr_delay=10000000 -rx_delay=1000000 -tx_delay=1000000 -rx_fifo_size=524288 -tx_fifo_size=524288 -rx_filter=true -hardware_address=00:90:00:00:00:01 -rx_thread=false -tx_thread=false -rss=false - -[drivesys.tsunami.etherint] -type=NSGigEInt -peer=null -device=drivesys.tsunami.ethernet - -[etherdump] -type=EtherDump -file=ethertrace -maxlen=96 - -[etherlink] -type=EtherLink -int1=testsys.tsunami.etherint -int2=drivesys.tsunami.etherint -speed=8000 -delay=0 -delay_var=0 -dump=etherdump - -[drivesys.membus] -type=Bus -bus_id=1 -clock=1000 -width=64 -responder_set=false -block_size=64 - -[drivesys.membus.responder] -type=IsaFake -pio_addr=0 -pio_latency=1 -pio_size=8 -ret_bad_addr=true -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.bridge] -type=Bridge -req_size_a=16 -req_size_b=16 -resp_size_a=16 -resp_size_b=16 -delay=50000 -nack_delay=4000 -write_ack=false -fix_partial_write_a=false -fix_partial_write_b=true - -[drivesys.disk0.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[drivesys.disk0.image] -type=CowDiskImage -child=drivesys.disk0.image.child -image_file= -table_size=65536 -read_only=false - -[drivesys.disk0] -type=IdeDisk -image=drivesys.disk0.image -driveID=master -delay=1000000 - -[drivesys.disk2.image.child] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-bigswap2.img -read_only=true - -[drivesys.disk2.image] -type=CowDiskImage -child=drivesys.disk2.image.child -image_file= -table_size=65536 -read_only=false - -[drivesys.disk2] -type=IdeDisk -image=drivesys.disk2.image -driveID=master -delay=1000000 - -[drivesys.simple_disk.disk] -type=RawDiskImage -image_file=/dist/m5/system/disks/linux-latest.img -read_only=true - -[drivesys.simple_disk] -type=SimpleDisk -system=drivesys -disk=drivesys.simple_disk.disk - -[drivesys.tsunami.fake_uart1] -type=IsaFake -pio_addr=8804615848696 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_uart2] -type=IsaFake -pio_addr=8804615848936 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_uart3] -type=IsaFake -pio_addr=8804615848680 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_uart4] -type=IsaFake -pio_addr=8804615848944 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_ppc] -type=IsaFake -pio_addr=8804615848891 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.cchip] -type=TsunamiCChip -pio_addr=8803072344064 -pio_latency=1000 -platform=drivesys.tsunami -system=drivesys -tsunami=drivesys.tsunami - -[drivesys.tsunami.io] -type=TsunamiIO -pio_addr=8804615847936 -pio_latency=1000 -frequency=976562500 -platform=drivesys.tsunami -system=drivesys -time=2009 1 1 0 0 0 3 1 -year_is_bcd=false -tsunami=drivesys.tsunami - -[] -type=PciConfigAll -pio_latency=1 -bus=0 -size=16777216 -platform=drivesys.tsunami -system=drivesys - -[drivesys.sim_console] -type=SimConsole -intr_control=drivesys.intrctrl -output=console -port=3456 -append_name=true -number=0 - -[drivesys.cpu.itb] -type=AlphaITB -size=48 - -[drivesys.cpu.dtb] -type=AlphaDTB -size=64 - -[drivesys.cpu] -type=AtomicSimpleCPU -max_insts_any_thread=0 -max_insts_all_threads=0 -max_loads_any_thread=0 -max_loads_all_threads=0 -progress_interval=0 -system=drivesys -cpu_id=0 -itb=drivesys.cpu.itb -dtb=drivesys.cpu.dtb -profile=0 -do_quiesce=true -do_checkpoint_insts=true -do_statistics_insts=true -clock=1 -phase=0 -defer_registration=false -width=1 -function_trace=false -function_trace_start=0 -simulate_stalls=false - -[drivesys.tsunami.console] -type=AlphaConsole -sim_console=drivesys.sim_console -disk=drivesys.simple_disk -pio_addr=8804682956800 -system=drivesys -cpu=drivesys.cpu -platform=drivesys.tsunami -pio_latency=1000 - -[drivesys.tsunami.fake_ata1] -type=IsaFake -pio_addr=8804615848304 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_ata0] -type=IsaFake -pio_addr=8804615848432 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.pchip] -type=TsunamiPChip -pio_addr=8802535473152 -pio_latency=1000 -platform=drivesys.tsunami -system=drivesys -tsunami=drivesys.tsunami - -[drivesys.tsunami.fake_pnp_read3] -type=IsaFake -pio_addr=8804615848643 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read2] -type=IsaFake -pio_addr=8804615848579 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read1] -type=IsaFake -pio_addr=8804615848515 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read0] -type=IsaFake -pio_addr=8804615848451 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read7] -type=IsaFake -pio_addr=8804615848899 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read6] -type=IsaFake -pio_addr=8804615848835 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read5] -type=IsaFake -pio_addr=8804615848771 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_read4] -type=IsaFake -pio_addr=8804615848707 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_write] -type=IsaFake -pio_addr=8804615850617 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fb] -type=BadDevice -devicename=FrameBuffer -pio_addr=8804615848912 -system=drivesys -platform=drivesys.tsunami -pio_latency=1000 - -[drivesys.tsunami.fake_OROM] -type=IsaFake -pio_addr=8796093677568 -pio_latency=1000 -pio_size=393216 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.uart] -type=Uart8250 -pio_addr=8804615848952 -pio_latency=1000 -platform=drivesys.tsunami -sim_console=drivesys.sim_console -system=drivesys - -[drivesys.tsunami.fake_sm_chip] -type=IsaFake -pio_addr=8804615848816 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.fake_pnp_addr] -type=IsaFake -pio_addr=8804615848569 -pio_latency=1000 -pio_size=8 -ret_bad_addr=false -update_data=false -warn_access= -ret_data8=255 -ret_data16=65535 -ret_data32=4294967295 -ret_data64=18446744073709551615 -platform=drivesys.tsunami -system=drivesys - -[drivesys.tsunami.ide.configdata] -type=PciConfigData -VendorID=32902 -DeviceID=28945 -Command=0 -Status=640 -Revision=0 -ProgIF=133 -SubClassCode=1 -ClassCode=1 -CacheLineSize=0 -LatencyTimer=0 -HeaderType=0 -BIST=0 -BAR0=1 -BAR1=1 -BAR2=1 -BAR3=1 -BAR4=1 -BAR5=1 -CardbusCIS=0 -SubsystemVendorID=0 -SubsystemID=0 -ExpansionROM=0 -InterruptLine=31 -InterruptPin=1 -MinimumGrant=0 -MaximumLatency=0 -BAR0Size=8 -BAR1Size=4 -BAR2Size=8 -BAR3Size=4 -BAR4Size=16 -BAR5Size=0 - -[drivesys.tsunami.ide] -type=IdeController -system=drivesys -platform=drivesys.tsunami -min_backoff_delay=4000 -max_backoff_delay=10000000 -configdata=drivesys.tsunami.ide.configdata -pci_bus=0 -pci_dev=0 -pci_func=0 -pio_latency=1000 -config_latency=20000 -disks=drivesys.disk0 drivesys.disk2 - -[drivesys.iobus] -type=Bus -bus_id=0 -clock=1000 -width=64 -responder_set=true -block_size=64 - -- cgit v1.2.3 From 851e3c852be4eb031293ed271502a0e14ca9273f Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 3 Aug 2007 18:04:26 -0400 Subject: tests: replace all dest ref files on upgrade (if possible). Originally we were copying all source files in, but this caused problems when (large) inputs were copied along with outputs. Then we switched to just copying the standard files (m5stats.txt, etc.) but that was missing things like the *.console files. This fix should catch all the non-standard files too as long as they are copied in manually once when the test is set up. Also get a lot nicer about warning when files are ignored, and warn when expected files are missing. Those new Python sets sure are handy. --HG-- extra : convert_revision : 55c046de124522499af74a471968677c020bbf38 --- tests/SConscript | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/SConscript b/tests/SConscript index 812ce8c11..62c4d0508 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -102,6 +102,19 @@ def print_test(target, source, env): printAction = env.Action(print_test, strfunction = None) +# Static vars for update_test: +# - long-winded message about ignored sources +ignore_msg = ''' +Note: The following file(s) will not be copied. New non-standard + output files must be copied manually once before update_ref will + recognize them as outputs. Otherwise they are assumed to be + inputs and are ignored. +''' +# - reference files always needed +needed_files = set(['stdout', 'stderr', 'm5stats.txt', 'config.ini']) +# - source files we always want to ignore +known_ignores = set(['status', 'outdiff', 'statsdiff']) + def update_test(target, source, env): """Update reference test outputs. @@ -112,9 +125,21 @@ def update_test(target, source, env): """ dest_dir = str(source[0].get_dir()) src_dir = str(source[1].get_dir()) - dest_files = os.listdir(dest_dir) - src_files = os.listdir(src_dir) - for f in ('stdout', 'stderr', 'm5stats.txt', 'config.ini'): + dest_files = set(os.listdir(dest_dir)) + src_files = set(os.listdir(src_dir)) + # Copy all of the required files plus any existing dest files. + wanted_files = needed_files | dest_files + missing_files = wanted_files - src_files + if len(missing_files) > 0: + print " WARNING: the following file(s) are missing " \ + "and will not be updated:" + print " ", " ,".join(missing_files) + copy_files = wanted_files - missing_files + warn_ignored_files = (src_files - copy_files) - known_ignores + if len(warn_ignored_files) > 0: + print ignore_msg, + print " ", ", ".join(warn_ignored_files) + for f in copy_files: if f in dest_files: print " Replacing file", f dest_files.remove(f) @@ -123,12 +148,6 @@ def update_test(target, source, env): copyAction = Copy(os.path.join(dest_dir, f), os.path.join(src_dir, f)) copyAction.strfunction = None Execute(copyAction) - # warn about any files in dest not overwritten (other than SCCS dir) - if 'SCCS' in dest_files: - dest_files.remove('SCCS') - if dest_files: - print "Warning: file(s) in", dest_dir, "not updated:", - print ', '.join(dest_files) return 0 def update_test_string(target, source, env): -- cgit v1.2.3 From bb3f7dc83b9a4c7b20aeb893fea447854c855225 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 3 Aug 2007 18:04:30 -0400 Subject: tests: new ref outputs for new cache model --HG-- extra : convert_revision : 244749072f97e425c2ca1cf296f2b95f37e99eb6 --- .../00.hello/ref/alpha/linux/o3-timing/config.ini | 45 +- .../00.hello/ref/alpha/linux/o3-timing/m5stats.txt | 517 ++++---- .../00.hello/ref/alpha/linux/o3-timing/stdout | 6 +- .../ref/alpha/linux/simple-atomic/config.ini | 6 +- .../ref/alpha/linux/simple-atomic/m5stats.txt | 7 +- .../00.hello/ref/alpha/linux/simple-atomic/stdout | 8 +- .../ref/alpha/linux/simple-timing/config.ini | 24 +- .../ref/alpha/linux/simple-timing/m5stats.txt | 148 ++- .../00.hello/ref/alpha/linux/simple-timing/stdout | 10 +- .../00.hello/ref/alpha/tru64/o3-timing/config.ini | 45 +- .../00.hello/ref/alpha/tru64/o3-timing/m5stats.txt | 478 +++---- .../00.hello/ref/alpha/tru64/o3-timing/stdout | 6 +- .../ref/alpha/tru64/simple-atomic/config.ini | 6 +- .../ref/alpha/tru64/simple-atomic/m5stats.txt | 7 +- .../00.hello/ref/alpha/tru64/simple-atomic/stdout | 8 +- .../ref/alpha/tru64/simple-timing/config.ini | 24 +- .../ref/alpha/tru64/simple-timing/m5stats.txt | 101 +- .../00.hello/ref/alpha/tru64/simple-timing/stdout | 10 +- .../ref/mips/linux/simple-atomic/config.ini | 8 +- .../ref/mips/linux/simple-atomic/m5stats.txt | 8 +- .../00.hello/ref/mips/linux/simple-atomic/stdout | 4 +- .../ref/mips/linux/simple-timing/config.ini | 32 +- .../ref/mips/linux/simple-timing/m5stats.txt | 108 +- .../00.hello/ref/mips/linux/simple-timing/stdout | 6 +- .../ref/sparc/linux/simple-atomic/config.ini | 8 +- .../ref/sparc/linux/simple-atomic/m5stats.txt | 8 +- .../00.hello/ref/sparc/linux/simple-atomic/stdout | 4 +- .../ref/sparc/linux/simple-timing/config.ini | 32 +- .../ref/sparc/linux/simple-timing/m5stats.txt | 165 +-- .../00.hello/ref/sparc/linux/simple-timing/stdout | 6 +- .../ref/alpha/linux/o3-timing/config.ini | 45 +- .../ref/alpha/linux/o3-timing/m5stats.txt | 732 +++++------ .../ref/alpha/linux/o3-timing/stdout | 6 +- .../linux/tsunami-simple-atomic-dual/config.ini | 68 +- .../linux/tsunami-simple-atomic-dual/m5stats.txt | 242 ++-- .../alpha/linux/tsunami-simple-atomic-dual/stderr | 6 +- .../alpha/linux/tsunami-simple-atomic-dual/stdout | 11 +- .../alpha/linux/tsunami-simple-atomic/config.ini | 38 +- .../alpha/linux/tsunami-simple-atomic/m5stats.txt | 184 ++- .../ref/alpha/linux/tsunami-simple-atomic/stderr | 4 +- .../ref/alpha/linux/tsunami-simple-atomic/stdout | 11 +- .../linux/tsunami-simple-timing-dual/config.ini | 68 +- .../linux/tsunami-simple-timing-dual/m5stats.txt | 1036 ++++++++-------- .../alpha/linux/tsunami-simple-timing-dual/stderr | 8 +- .../alpha/linux/tsunami-simple-timing-dual/stdout | 11 +- .../alpha/linux/tsunami-simple-timing/config.ini | 38 +- .../alpha/linux/tsunami-simple-timing/m5stats.txt | 474 ++++--- .../ref/alpha/linux/tsunami-simple-timing/stderr | 4 +- .../ref/alpha/linux/tsunami-simple-timing/stdout | 11 +- .../50.memtest/ref/alpha/linux/memtest/config.ini | 102 +- .../50.memtest/ref/alpha/linux/memtest/m5stats.txt | 1299 ++++++++------------ .../50.memtest/ref/alpha/linux/memtest/stderr | 146 +-- .../50.memtest/ref/alpha/linux/memtest/stdout | 10 +- .../linux/twosys-tsunami-simple-atomic/config.ini | 20 +- .../linux/twosys-tsunami-simple-atomic/m5stats.txt | 12 +- .../linux/twosys-tsunami-simple-atomic/stderr | 8 +- .../linux/twosys-tsunami-simple-atomic/stdout | 10 +- 57 files changed, 2963 insertions(+), 3486 deletions(-) diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini index f112ef506..f145eee43 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=DerivO3CPU -children=dcache fuPool icache l2cache toL2Bus workload +children=dcache fuPool icache l2cache toL2Bus tracer workload BTBEntries=4096 BTBTagSize=16 LFSTSize=1024 @@ -86,6 +86,7 @@ smtROBPolicy=Partitioned smtROBThreshold=100 squashWidth=8 system=system +tracer=system.cpu.tracer trapLatency=13 wbDepth=1 wbWidth=8 @@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -118,12 +116,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL [system.cpu.fuPool.FUList0] type=FUDesc -children=opList0 +children=opList count=6 -opList=system.cpu.fuPool.FUList0.opList0 +opList=system.cpu.fuPool.FUList0.opList -[system.cpu.fuPool.FUList0.opList0] +[system.cpu.fuPool.FUList0.opList] type=OpDesc issueLat=1 opClass=IntAlu @@ -217,11 +213,11 @@ opLat=24 [system.cpu.fuPool.FUList4] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList4.opList0 +opList=system.cpu.fuPool.FUList4.opList -[system.cpu.fuPool.FUList4.opList0] +[system.cpu.fuPool.FUList4.opList] type=OpDesc issueLat=1 opClass=MemRead @@ -229,11 +225,11 @@ opLat=1 [system.cpu.fuPool.FUList5] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList5.opList0 +opList=system.cpu.fuPool.FUList5.opList -[system.cpu.fuPool.FUList5.opList0] +[system.cpu.fuPool.FUList5.opList] type=OpDesc issueLat=1 opClass=MemWrite @@ -259,11 +255,11 @@ opLat=1 [system.cpu.fuPool.FUList7] type=FUDesc -children=opList0 +children=opList count=1 -opList=system.cpu.fuPool.FUList7.opList0 +opList=system.cpu.fuPool.FUList7.opList -[system.cpu.fuPool.FUList7.opList0] +[system.cpu.fuPool.FUList7.opList] type=OpDesc issueLat=3 opClass=IprAccess @@ -271,12 +267,9 @@ opLat=3 [system.cpu.icache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -294,12 +287,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -333,12 +321,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -356,6 +342,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt index 2ac86dd84..02095a557 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/m5stats.txt @@ -1,40 +1,40 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 522 # Number of BTB hits -global.BPredUnit.BTBLookups 1584 # Number of BTB lookups -global.BPredUnit.RASInCorrect 57 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 422 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 1088 # Number of conditional branches predicted -global.BPredUnit.lookups 1837 # Number of BP lookups -global.BPredUnit.usedRAS 241 # Number of times the RAS was used to get a target. -host_inst_rate 39303 # Simulator instruction rate (inst/s) -host_mem_usage 153768 # Number of bytes of host memory used -host_seconds 0.14 # Real time elapsed on the host -host_tick_rate 32016268 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 17 # Number of conflicting loads. -memdepunit.memDep.conflictingStores 127 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 1874 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1142 # Number of stores inserted to the mem dependence unit. +global.BPredUnit.BTBHits 543 # Number of BTB hits +global.BPredUnit.BTBLookups 1720 # Number of BTB lookups +global.BPredUnit.RASInCorrect 59 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 423 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 1175 # Number of conditional branches predicted +global.BPredUnit.lookups 2025 # Number of BP lookups +global.BPredUnit.usedRAS 277 # Number of times the RAS was used to get a target. +host_inst_rate 29843 # Simulator instruction rate (inst/s) +host_mem_usage 154572 # Number of bytes of host memory used +host_seconds 0.19 # Real time elapsed on the host +host_tick_rate 22095832 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 31 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 133 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 1967 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1200 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5623 # Number of instructions simulated -sim_seconds 0.000005 # Number of seconds simulated -sim_ticks 4589500 # Number of ticks simulated +sim_seconds 0.000004 # Number of seconds simulated +sim_ticks 4170500 # Number of ticks simulated system.cpu.commit.COM:branches 862 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 104 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 105 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 8521 +system.cpu.commit.COM:committed_per_cycle.samples 7614 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 6200 7276.14% - 1 1160 1361.34% - 2 469 550.40% - 3 177 207.72% - 4 131 153.74% - 5 98 115.01% - 6 109 127.92% - 7 73 85.67% - 8 104 122.05% + 0 5315 6980.56% + 1 1182 1552.40% + 2 399 524.03% + 3 192 252.17% + 4 125 164.17% + 5 99 130.02% + 6 130 170.74% + 7 67 88.00% + 8 105 137.90% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -43,70 +43,70 @@ system.cpu.commit.COM:loads 979 # Nu system.cpu.commit.COM:membars 0 # Number of memory barriers committed system.cpu.commit.COM:refs 1791 # Number of memory references committed system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 350 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 349 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 5640 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 17 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 3571 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 3957 # The number of squashed insts skipped by commit system.cpu.committedInsts 5623 # Number of Instructions Simulated system.cpu.committedInsts_total 5623 # Number of Instructions Simulated -system.cpu.cpi 1.636315 # CPI: Cycles Per Instruction -system.cpu.cpi_total 1.636315 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 1470 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 5932.330827 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 5380 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 1337 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 789000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.090476 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 133 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 33 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 538000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.068027 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 100 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 4504.373178 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5116.438356 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 469 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1545000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.422414 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 343 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 270 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 373500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses +system.cpu.cpi 1.483372 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.483372 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 1487 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 8188.118812 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 5495.049505 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1386 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 827000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.067922 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 101 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 34 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 555000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.067922 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 101 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 561 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 18316.091954 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5068.965517 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 474 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 1593500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.155080 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 87 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 251 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 441000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.155080 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 87 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 10.439306 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 10.770115 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 2282 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 4903.361345 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 5268.786127 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1806 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 2334000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.208589 # miss rate for demand accesses -system.cpu.dcache.demand_misses 476 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 303 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 911500 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.075811 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 173 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_accesses 2048 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 12875 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 5297.872340 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1860 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2420500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.091797 # miss rate for demand accesses +system.cpu.dcache.demand_misses 188 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 285 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 996000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.091797 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 188 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 2282 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 4903.361345 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 5268.786127 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 2048 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 12875 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 5297.872340 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1806 # number of overall hits -system.cpu.dcache.overall_miss_latency 2334000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.208589 # miss rate for overall accesses -system.cpu.dcache.overall_misses 476 # number of overall misses -system.cpu.dcache.overall_mshr_hits 303 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 911500 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.075811 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 173 # number of overall MSHR misses +system.cpu.dcache.overall_hits 1860 # number of overall hits +system.cpu.dcache.overall_miss_latency 2420500 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.091797 # miss rate for overall accesses +system.cpu.dcache.overall_misses 188 # number of overall misses +system.cpu.dcache.overall_mshr_hits 285 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 996000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.091797 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 188 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -119,91 +119,91 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 173 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 174 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 112.669258 # Cycle average of tags in use -system.cpu.dcache.total_refs 1806 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 112.600183 # Cycle average of tags in use +system.cpu.dcache.total_refs 1874 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 389 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 75 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 143 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 10466 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 6230 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 1855 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 679 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 228 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:BlockedCycles 391 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 82 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 164 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 11387 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 5174 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 2002 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 726 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 244 # Number of squashed instructions handled by decode system.cpu.decode.DECODE:UnblockCycles 48 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 1837 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 1469 # Number of cache lines fetched -system.cpu.fetch.Cycles 3456 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 267 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 11417 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 455 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.199652 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 1469 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 763 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.240843 # Number of inst fetches per cycle +system.cpu.fetch.Branches 2025 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 1529 # Number of cache lines fetched +system.cpu.fetch.Cycles 3690 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 212 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 12463 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 457 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.242777 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 1529 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 820 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 1.494185 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 9201 +system.cpu.fetch.rateDist.samples 8341 system.cpu.fetch.rateDist.min_value 0 - 0 7216 7842.63% - 1 168 182.59% - 2 148 160.85% - 3 136 147.81% - 4 214 232.58% - 5 138 149.98% - 6 177 192.37% - 7 95 103.25% - 8 909 987.94% + 0 6181 7410.38% + 1 173 207.41% + 2 174 208.61% + 3 151 181.03% + 4 219 262.56% + 5 157 188.23% + 6 179 214.60% + 7 102 122.29% + 8 1005 1204.89% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 1469 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5381.818182 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4530.448718 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 1139 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1776000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.224643 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 330 # number of ReadReq misses +system.cpu.icache.ReadReq_accesses 1511 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5621.019108 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4464.968153 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 1197 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1765000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.207809 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 314 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 18 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 1413500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.212389 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 312 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_miss_latency 1402000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.207809 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 314 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 3.650641 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 3.812102 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 1469 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5381.818182 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4530.448718 # average overall mshr miss latency -system.cpu.icache.demand_hits 1139 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1776000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.224643 # miss rate for demand accesses -system.cpu.icache.demand_misses 330 # number of demand (read+write) misses +system.cpu.icache.demand_accesses 1511 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5621.019108 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4464.968153 # average overall mshr miss latency +system.cpu.icache.demand_hits 1197 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1765000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.207809 # miss rate for demand accesses +system.cpu.icache.demand_misses 314 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 18 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 1413500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.212389 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 312 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_miss_latency 1402000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.207809 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 314 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 1469 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5381.818182 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4530.448718 # average overall mshr miss latency +system.cpu.icache.overall_accesses 1511 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5621.019108 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4464.968153 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 1139 # number of overall hits -system.cpu.icache.overall_miss_latency 1776000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.224643 # miss rate for overall accesses -system.cpu.icache.overall_misses 330 # number of overall misses +system.cpu.icache.overall_hits 1197 # number of overall hits +system.cpu.icache.overall_miss_latency 1765000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.207809 # miss rate for overall accesses +system.cpu.icache.overall_misses 314 # number of overall misses system.cpu.icache.overall_mshr_hits 18 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 1413500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.212389 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 312 # number of overall MSHR misses +system.cpu.icache.overall_mshr_miss_latency 1402000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.207809 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 314 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -216,76 +216,76 @@ system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.icache.replacements 0 # number of replacements -system.cpu.icache.sampled_refs 312 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 314 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 165.921810 # Cycle average of tags in use -system.cpu.icache.total_refs 1139 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 167.838424 # Cycle average of tags in use +system.cpu.icache.total_refs 1197 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 2474 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 1144 # Number of branches executed -system.cpu.iew.EXEC:nop 40 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.835018 # Inst execution rate -system.cpu.iew.EXEC:refs 2519 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 977 # Number of stores executed +system.cpu.idleCycles 1997 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 1159 # Number of branches executed +system.cpu.iew.EXEC:nop 43 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.933581 # Inst execution rate +system.cpu.iew.EXEC:refs 2561 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 971 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 5193 # num instructions consuming a value -system.cpu.iew.WB:count 7387 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.742923 # average fanout of values written-back +system.cpu.iew.WB:consumers 5329 # num instructions consuming a value +system.cpu.iew.WB:count 7480 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.739351 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 3858 # num instructions producing a value -system.cpu.iew.WB:rate 0.802848 # insts written-back per cycle -system.cpu.iew.WB:sent 7452 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 373 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:producers 3940 # num instructions producing a value +system.cpu.iew.WB:rate 0.896775 # insts written-back per cycle +system.cpu.iew.WB:sent 7559 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 402 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 4 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 1874 # Number of dispatched load instructions -system.cpu.iew.iewDispNonSpecInsts 22 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 302 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 1142 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 9228 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 1542 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 285 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 7683 # Number of executed instructions +system.cpu.iew.iewDispLoadInsts 1967 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 23 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 240 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 1200 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 9614 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 1590 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 364 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 7787 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 679 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 726 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 50 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.0.ignoredResponses 4 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.forwLoads 48 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 3 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 63 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 70 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 895 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 330 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 63 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 262 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 111 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.611129 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.611129 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 7968 # Type of FU issued +system.cpu.iew.lsq.thread.0.squashedLoads 988 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 388 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 70 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 287 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 115 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.674140 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.674140 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 8151 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist - No_OpClass 2 0.03% # Type of FU issued - IntAlu 5314 66.69% # Type of FU issued + No_OpClass 2 0.02% # Type of FU issued + IntAlu 5422 66.52% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued - FloatAdd 2 0.03% # Type of FU issued + FloatAdd 2 0.02% # Type of FU issued FloatCmp 0 0.00% # Type of FU issued FloatCvt 0 0.00% # Type of FU issued FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1659 20.82% # Type of FU issued - MemWrite 990 12.42% # Type of FU issued + MemRead 1720 21.10% # Type of FU issued + MemWrite 1004 12.32% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 105 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.013178 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 104 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.012759 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist No_OpClass 0 0.00% # attempts to use FU when none available IntAlu 0 0.00% # attempts to use FU when none available @@ -297,77 +297,96 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 70 66.67% # attempts to use FU when none available - MemWrite 35 33.33% # attempts to use FU when none available + MemRead 70 67.31% # attempts to use FU when none available + MemWrite 34 32.69% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 9201 +system.cpu.iq.ISSUE:issued_per_cycle.samples 8341 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 5952 6468.86% - 1 1111 1207.48% - 2 928 1008.59% - 3 433 470.60% - 4 378 410.82% - 5 251 272.80% - 6 111 120.64% - 7 27 29.34% - 8 10 10.87% + 0 5104 6119.17% + 1 1084 1299.60% + 2 829 993.89% + 3 533 639.01% + 4 366 438.80% + 5 258 309.32% + 6 126 151.06% + 7 28 33.57% + 8 13 15.59% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.865993 # Inst issue rate -system.cpu.iq.iqInstsAdded 9166 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 7968 # Number of instructions issued -system.cpu.iq.iqNonSpecInstsAdded 22 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 3154 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.ISSUE:rate 0.977221 # Inst issue rate +system.cpu.iq.iqInstsAdded 9548 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 8151 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 23 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 3578 # Number of squashed instructions iterated over during squash; mainly for profiling system.cpu.iq.iqSquashedInstsIssued 22 # Number of squashed instructions issued -system.cpu.iq.iqSquashedNonSpecRemoved 5 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 2035 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 483 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4644.927536 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2467.908903 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 2243500 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 483 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 1192000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 483 # number of ReadReq MSHR misses +system.cpu.iq.iqSquashedNonSpecRemoved 6 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 2360 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadExReq_accesses 73 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 3643.835616 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 2643.835616 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 266000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 73 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 193000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 73 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 415 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 3406.779661 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2406.779661 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 1407000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.995181 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 413 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 994000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.995181 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 413 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 3392.857143 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 2392.857143 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 47500 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 33500 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.005013 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 483 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4644.927536 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2467.908903 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 2243500 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 483 # number of demand (read+write) misses +system.cpu.l2cache.demand_accesses 488 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 3442.386831 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2442.386831 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 1673000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.995902 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 486 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 1192000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 483 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_miss_latency 1187000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 0.995902 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 486 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 483 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4644.927536 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2467.908903 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 488 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 3442.386831 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2442.386831 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 2243500 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 483 # number of overall misses +system.cpu.l2cache.overall_hits 2 # number of overall hits +system.cpu.l2cache.overall_miss_latency 1673000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.995902 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 486 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 1192000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 483 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_miss_latency 1187000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 0.995902 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 486 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -380,29 +399,29 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 483 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 399 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 278.204751 # Cycle average of tags in use -system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 223.758944 # Cycle average of tags in use +system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 9201 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 15 # Number of cycles rename is blocking +system.cpu.numCycles 8341 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 14 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 4051 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 6382 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 70 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 12837 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 10018 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 7477 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 1754 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 679 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 101 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 3426 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 270 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:IdleCycles 5339 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 71 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 13891 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 10852 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 8114 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 1888 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 726 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 102 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 4063 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 272 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 26 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 380 # count of insts added to the skid buffer -system.cpu.rename.RENAME:tempSerializingInsts 20 # count of temporary serializing insts renamed -system.cpu.timesIdled 26 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.rename.RENAME:skidInsts 382 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 21 # count of temporary serializing insts renamed +system.cpu.timesIdled 3 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 17 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout index 142cb9695..22c3e0435 100644 --- a/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/o3-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 21 2007 21:25:27 -M5 started Fri Jun 22 00:04:38 2007 +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:12 2007 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/o3-timing tests/run.py quick/00.hello/alpha/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 4589500 because target called exit() +Exiting @ tick 4170500 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini index e4dfe86d3..d025afdec 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=AtomicSimpleCPU -children=workload +children=tracer workload clock=500 cpu_id=0 defer_registration=false @@ -25,11 +25,15 @@ phase=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu.tracer width=1 workload=system.cpu.workload dcache_port=system.membus.port[2] icache_port=system.membus.port[1] +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt index f1c7bd968..f87ad2cd6 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/m5stats.txt @@ -1,8 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 93019 # Simulator instruction rate (inst/s) -host_seconds 0.06 # Real time elapsed on the host -host_tick_rate 46199079 # Simulator tick rate (ticks/s) +host_inst_rate 109073 # Simulator instruction rate (inst/s) +host_mem_usage 148564 # Number of bytes of host memory used +host_seconds 0.05 # Real time elapsed on the host +host_tick_rate 54123810 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5642 # Number of instructions simulated sim_seconds 0.000003 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout index 58fc0e374..0bec3d18f 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/simple-atomic/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:34 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/simple-atomic tests/run.py quick/00.hello/alpha/linux/simple-atomic +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:12 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/simple-atomic tests/run.py quick/00.hello/alpha/linux/simple-atomic Global frequency set at 1000000000000 ticks per second Exiting @ tick 2820500 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini index 47315cc1d..5ae318852 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=TimingSimpleCPU -children=dcache icache l2cache toL2Bus workload +children=dcache icache l2cache toL2Bus tracer workload clock=500 cpu_id=0 defer_registration=false @@ -24,17 +24,16 @@ max_loads_any_thread=0 phase=0 progress_interval=0 system=system +tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -52,12 +51,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.icache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -90,12 +85,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -128,12 +119,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -151,6 +140,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt index 1b70f10b3..db7224c2e 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/m5stats.txt @@ -1,12 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 54390 # Simulator instruction rate (inst/s) -host_seconds 0.10 # Real time elapsed on the host -host_tick_rate 126525357 # Simulator tick rate (ticks/s) +host_inst_rate 65923 # Simulator instruction rate (inst/s) +host_mem_usage 154180 # Number of bytes of host memory used +host_seconds 0.09 # Real time elapsed on the host +host_tick_rate 155349854 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5642 # Number of instructions simulated sim_seconds 0.000013 # Number of seconds simulated -sim_ticks 13168000 # Number of ticks simulated +sim_ticks 13359000 # Number of ticks simulated system.cpu.dcache.ReadReq_accesses 979 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 14000 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 13000 # average ReadReq mshr miss latency @@ -20,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses 92 # nu system.cpu.dcache.WriteReq_accesses 812 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 14000 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 739 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1022000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.089901 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 73 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 949000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.089901 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 73 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_hits 725 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 1218000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.107143 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 87 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 1131000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.107143 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 87 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.dcache.avg_refs 9.854545 # Average number of references to valid blocks. @@ -38,14 +39,14 @@ system.cpu.dcache.cache_copies 0 # nu system.cpu.dcache.demand_accesses 1791 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 13000 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1626 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 2310000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.092127 # miss rate for demand accesses -system.cpu.dcache.demand_misses 165 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 1612 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2506000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.099944 # miss rate for demand accesses +system.cpu.dcache.demand_misses 179 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 2145000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.092127 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 165 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 2327000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.099944 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 179 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate @@ -53,14 +54,14 @@ system.cpu.dcache.overall_accesses 1791 # nu system.cpu.dcache.overall_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 13000 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1626 # number of overall hits -system.cpu.dcache.overall_miss_latency 2310000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.092127 # miss rate for overall accesses -system.cpu.dcache.overall_misses 165 # number of overall misses +system.cpu.dcache.overall_hits 1612 # number of overall hits +system.cpu.dcache.overall_miss_latency 2506000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.099944 # miss rate for overall accesses +system.cpu.dcache.overall_misses 179 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 2145000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.092127 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 165 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 2327000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.099944 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 179 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -75,18 +76,18 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 165 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 103.640117 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 103.895955 # Cycle average of tags in use system.cpu.dcache.total_refs 1626 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 5643 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 13960.288809 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 12960.288809 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 13992.779783 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 12992.779783 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 5366 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 3867000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 3876000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.049087 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 277 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 3590000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 3599000 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.049087 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 277 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -98,29 +99,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 5643 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 13960.288809 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 12960.288809 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 13992.779783 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 12992.779783 # average overall mshr miss latency system.cpu.icache.demand_hits 5366 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 3867000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 3876000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.049087 # miss rate for demand accesses system.cpu.icache.demand_misses 277 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 3590000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 3599000 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.049087 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 277 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.overall_accesses 5643 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 13960.288809 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 12960.288809 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 13992.779783 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 12992.779783 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_hits 5366 # number of overall hits -system.cpu.icache.overall_miss_latency 3867000 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 3876000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.049087 # miss rate for overall accesses system.cpu.icache.overall_misses 277 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 3590000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 3599000 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.049087 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 277 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -137,53 +138,72 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 277 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 129.241810 # Cycle average of tags in use +system.cpu.icache.tagsinuse 129.745202 # Cycle average of tags in use system.cpu.icache.total_refs 5366 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 441 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 13000 # average ReadReq miss latency +system.cpu.l2cache.ReadExReq_accesses 73 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 12000 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 11000 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 876000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 73 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 803000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 73 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 369 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 12000 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 11000 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 5733000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 441 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 4851000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 441 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 4416000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.997290 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 368 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 4048000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.997290 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 368 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 12000 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 11000 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 168000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 154000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.002825 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 441 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.demand_accesses 442 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 11000 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 5733000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 5292000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.997738 # miss rate for demand accesses system.cpu.l2cache.demand_misses 441 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_miss_latency 4851000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate 0.997738 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 441 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 441 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.overall_accesses 442 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 5733000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_hits 1 # number of overall hits +system.cpu.l2cache.overall_miss_latency 5292000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.997738 # miss rate for overall accesses system.cpu.l2cache.overall_misses 441 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_miss_latency 4851000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate 0.997738 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 441 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -197,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 441 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 354 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 232.802947 # Cycle average of tags in use -system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 179.464793 # Cycle average of tags in use +system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 13168000 # number of cpu cycles simulated +system.cpu.numCycles 13359000 # number of cpu cycles simulated system.cpu.num_insts 5642 # Number of instructions executed system.cpu.num_refs 1792 # Number of memory references system.cpu.workload.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout index 501ba5063..d25a0624e 100644 --- a/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/linux/simple-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:35 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/linux/simple-timing tests/run.py quick/00.hello/alpha/linux/simple-timing +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:13 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/simple-timing tests/run.py quick/00.hello/alpha/linux/simple-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 13168000 because target called exit() +Exiting @ tick 13359000 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini index 36a50c983..ca7690f17 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=DerivO3CPU -children=dcache fuPool icache l2cache toL2Bus workload +children=dcache fuPool icache l2cache toL2Bus tracer workload BTBEntries=4096 BTBTagSize=16 LFSTSize=1024 @@ -86,6 +86,7 @@ smtROBPolicy=Partitioned smtROBThreshold=100 squashWidth=8 system=system +tracer=system.cpu.tracer trapLatency=13 wbDepth=1 wbWidth=8 @@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -118,12 +116,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL [system.cpu.fuPool.FUList0] type=FUDesc -children=opList0 +children=opList count=6 -opList=system.cpu.fuPool.FUList0.opList0 +opList=system.cpu.fuPool.FUList0.opList -[system.cpu.fuPool.FUList0.opList0] +[system.cpu.fuPool.FUList0.opList] type=OpDesc issueLat=1 opClass=IntAlu @@ -217,11 +213,11 @@ opLat=24 [system.cpu.fuPool.FUList4] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList4.opList0 +opList=system.cpu.fuPool.FUList4.opList -[system.cpu.fuPool.FUList4.opList0] +[system.cpu.fuPool.FUList4.opList] type=OpDesc issueLat=1 opClass=MemRead @@ -229,11 +225,11 @@ opLat=1 [system.cpu.fuPool.FUList5] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList5.opList0 +opList=system.cpu.fuPool.FUList5.opList -[system.cpu.fuPool.FUList5.opList0] +[system.cpu.fuPool.FUList5.opList] type=OpDesc issueLat=1 opClass=MemWrite @@ -259,11 +255,11 @@ opLat=1 [system.cpu.fuPool.FUList7] type=FUDesc -children=opList0 +children=opList count=1 -opList=system.cpu.fuPool.FUList7.opList0 +opList=system.cpu.fuPool.FUList7.opList -[system.cpu.fuPool.FUList7.opList0] +[system.cpu.fuPool.FUList7.opList] type=OpDesc issueLat=3 opClass=IprAccess @@ -271,12 +267,9 @@ opLat=3 [system.cpu.icache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -294,12 +287,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -333,12 +321,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -356,6 +342,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt index d400dcd22..f575843e4 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/m5stats.txt @@ -1,40 +1,40 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. -global.BPredUnit.BTBHits 132 # Number of BTB hits -global.BPredUnit.BTBLookups 584 # Number of BTB lookups -global.BPredUnit.RASInCorrect 28 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 208 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 376 # Number of conditional branches predicted -global.BPredUnit.lookups 738 # Number of BP lookups -global.BPredUnit.usedRAS 140 # Number of times the RAS was used to get a target. -host_inst_rate 39805 # Simulator instruction rate (inst/s) -host_mem_usage 153128 # Number of bytes of host memory used -host_seconds 0.06 # Real time elapsed on the host -host_tick_rate 34110715 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 8 # Number of conflicting loads. +global.BPredUnit.BTBHits 146 # Number of BTB hits +global.BPredUnit.BTBLookups 613 # Number of BTB lookups +global.BPredUnit.RASInCorrect 32 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 212 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 393 # Number of conditional branches predicted +global.BPredUnit.lookups 777 # Number of BP lookups +global.BPredUnit.usedRAS 153 # Number of times the RAS was used to get a target. +host_inst_rate 24407 # Simulator instruction rate (inst/s) +host_mem_usage 153952 # Number of bytes of host memory used +host_seconds 0.10 # Real time elapsed on the host +host_tick_rate 19202153 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 7 # Number of conflicting loads. memdepunit.memDep.conflictingStores 7 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 608 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 357 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 635 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 367 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2387 # Number of instructions simulated sim_seconds 0.000002 # Number of seconds simulated -sim_ticks 2055000 # Number of ticks simulated +sim_ticks 1884000 # Number of ticks simulated system.cpu.commit.COM:branches 396 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 41 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 33 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 3910 +system.cpu.commit.COM:committed_per_cycle.samples 3543 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 2950 7544.76% - 1 266 680.31% - 2 336 859.34% - 3 131 335.04% - 4 76 194.37% - 5 65 166.24% - 6 27 69.05% - 7 18 46.04% - 8 41 104.86% + 0 2580 7281.96% + 1 265 747.95% + 2 337 951.17% + 3 138 389.50% + 4 67 189.11% + 5 69 194.75% + 6 32 90.32% + 7 22 62.09% + 8 33 93.14% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -43,70 +43,70 @@ system.cpu.commit.COM:loads 415 # Nu system.cpu.commit.COM:membars 0 # Number of memory barriers committed system.cpu.commit.COM:refs 709 # Number of memory references committed system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 128 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 131 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 2576 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 4 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 978 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 1118 # The number of squashed insts skipped by commit system.cpu.committedInsts 2387 # Number of Instructions Simulated system.cpu.committedInsts_total 2387 # Number of Instructions Simulated -system.cpu.cpi 1.723083 # CPI: Cycles Per Instruction -system.cpu.cpi_total 1.723083 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 514 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 5391.304348 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4669.491525 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 445 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 372000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.134241 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 69 # number of ReadReq misses +system.cpu.cpi 1.578969 # CPI: Cycles Per Instruction +system.cpu.cpi_total 1.578969 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 518 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 6583.333333 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 4891.666667 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 458 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 395000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.115830 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 60 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 10 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 275500 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.114786 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 59 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 5669.014085 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5020 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 223 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 402500 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.241497 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 71 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 46 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 125500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.085034 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 25 # number of WriteReq MSHR misses +system.cpu.dcache.ReadReq_mshr_miss_latency 293500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.115830 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 60 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 239 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 14216.216216 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 5202.702703 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 202 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 526000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.154812 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 37 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 55 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 192500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.154812 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 37 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 7.952381 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 7.905882 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 808 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 5532.142857 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 4773.809524 # average overall mshr miss latency -system.cpu.dcache.demand_hits 668 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 774500 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.173267 # miss rate for demand accesses -system.cpu.dcache.demand_misses 140 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 56 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 401000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.103960 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 84 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_accesses 757 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 9494.845361 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 5010.309278 # average overall mshr miss latency +system.cpu.dcache.demand_hits 660 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 921000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.128137 # miss rate for demand accesses +system.cpu.dcache.demand_misses 97 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 65 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 486000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.128137 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 97 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 808 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 5532.142857 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 4773.809524 # average overall mshr miss latency +system.cpu.dcache.overall_accesses 757 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 9494.845361 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 5010.309278 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 668 # number of overall hits -system.cpu.dcache.overall_miss_latency 774500 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.173267 # miss rate for overall accesses -system.cpu.dcache.overall_misses 140 # number of overall misses -system.cpu.dcache.overall_mshr_hits 56 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 401000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.103960 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 84 # number of overall MSHR misses +system.cpu.dcache.overall_hits 660 # number of overall hits +system.cpu.dcache.overall_miss_latency 921000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.128137 # miss rate for overall accesses +system.cpu.dcache.overall_misses 97 # number of overall misses +system.cpu.dcache.overall_mshr_hits 65 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 486000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.128137 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 97 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -119,89 +119,90 @@ system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.dcache.replacements 0 # number of replacements -system.cpu.dcache.sampled_refs 84 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 85 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 51.873008 # Cycle average of tags in use -system.cpu.dcache.total_refs 668 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 51.399169 # Cycle average of tags in use +system.cpu.dcache.total_refs 672 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 95 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 81 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 123 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 4033 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 3045 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 771 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 202 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 298 # Number of squashed instructions handled by decode -system.cpu.fetch.Branches 738 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 654 # Number of cache lines fetched -system.cpu.fetch.Cycles 1444 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 120 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 4685 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 218 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.179431 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 654 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 272 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 1.139071 # Number of inst fetches per cycle +system.cpu.decode.DECODE:BlockedCycles 87 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 83 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 125 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 4218 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 2648 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 808 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 225 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 304 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:UnblockCycles 1 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 777 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 686 # Number of cache lines fetched +system.cpu.fetch.Cycles 1528 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 107 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 4951 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 223 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.206155 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 686 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 299 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 1.313611 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 4113 +system.cpu.fetch.rateDist.samples 3769 system.cpu.fetch.rateDist.min_value 0 - 0 3325 8084.12% - 1 32 77.80% - 2 80 194.51% - 3 50 121.57% - 4 99 240.70% - 5 52 126.43% - 6 39 94.82% - 7 35 85.10% - 8 401 974.96% + 0 2929 7771.29% + 1 36 95.52% + 2 88 233.48% + 3 54 143.27% + 4 108 286.55% + 5 55 145.93% + 6 40 106.13% + 7 42 111.44% + 8 417 1106.39% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 654 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 5298.507463 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 4556.451613 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 453 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 1065000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.307339 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 201 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 15 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 847500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.284404 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_accesses 676 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5629.032258 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4489.247312 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 490 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1047000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.275148 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 186 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 10 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 835000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.275148 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 186 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 2.435484 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 2.634409 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 654 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 5298.507463 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 4556.451613 # average overall mshr miss latency -system.cpu.icache.demand_hits 453 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 1065000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.307339 # miss rate for demand accesses -system.cpu.icache.demand_misses 201 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 15 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 847500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.284404 # mshr miss rate for demand accesses +system.cpu.icache.demand_accesses 676 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5629.032258 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4489.247312 # average overall mshr miss latency +system.cpu.icache.demand_hits 490 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1047000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.275148 # miss rate for demand accesses +system.cpu.icache.demand_misses 186 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 10 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 835000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.275148 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 186 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 654 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 5298.507463 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 4556.451613 # average overall mshr miss latency +system.cpu.icache.overall_accesses 676 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5629.032258 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4489.247312 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 453 # number of overall hits -system.cpu.icache.overall_miss_latency 1065000 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.307339 # miss rate for overall accesses -system.cpu.icache.overall_misses 201 # number of overall misses -system.cpu.icache.overall_mshr_hits 15 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 847500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.284404 # mshr miss rate for overall accesses +system.cpu.icache.overall_hits 490 # number of overall hits +system.cpu.icache.overall_miss_latency 1047000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.275148 # miss rate for overall accesses +system.cpu.icache.overall_misses 186 # number of overall misses +system.cpu.icache.overall_mshr_hits 10 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 835000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.275148 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 186 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -217,59 +218,59 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 186 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 106.293956 # Cycle average of tags in use -system.cpu.icache.total_refs 453 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 104.691657 # Cycle average of tags in use +system.cpu.icache.total_refs 490 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idleCycles 2992 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 501 # Number of branches executed -system.cpu.iew.EXEC:nop 234 # number of nop insts executed -system.cpu.iew.EXEC:rate 0.726477 # Inst execution rate -system.cpu.iew.EXEC:refs 878 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 333 # Number of stores executed +system.cpu.idleCycles 998 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 516 # Number of branches executed +system.cpu.iew.EXEC:nop 242 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.810295 # Inst execution rate +system.cpu.iew.EXEC:refs 894 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 334 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed -system.cpu.iew.WB:consumers 1652 # num instructions consuming a value -system.cpu.iew.WB:count 2914 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 0.799637 # average fanout of values written-back +system.cpu.iew.WB:consumers 1725 # num instructions consuming a value +system.cpu.iew.WB:count 2987 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.794203 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 1321 # num instructions producing a value -system.cpu.iew.WB:rate 0.708485 # insts written-back per cycle -system.cpu.iew.WB:sent 2931 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 135 # Number of branch mispredicts detected at execute +system.cpu.iew.WB:producers 1370 # num instructions producing a value +system.cpu.iew.WB:rate 0.792518 # insts written-back per cycle +system.cpu.iew.WB:sent 3007 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 146 # Number of branch mispredicts detected at execute system.cpu.iew.iewBlockCycles 0 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 608 # Number of dispatched load instructions -system.cpu.iew.iewDispNonSpecInsts 7 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 179 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 357 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 3571 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 545 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 87 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 2988 # Number of executed instructions +system.cpu.iew.iewDispLoadInsts 635 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 6 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 92 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 367 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 3711 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 560 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 111 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 3054 # Number of executed instructions system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 202 # Number of cycles IEW is squashing +system.cpu.iew.iewSquashCycles 225 # Number of cycles IEW is squashing system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 22 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.forwLoads 24 # Number of loads that had data forwarded from stores system.cpu.iew.lsq.thread.0.ignoredResponses 0 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 10 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 12 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 193 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 63 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 10 # Number of memory order violations +system.cpu.iew.lsq.thread.0.squashedLoads 220 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 73 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 12 # Number of memory order violations system.cpu.iew.predictedNotTakenIncorrect 98 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 37 # Number of branches that were predicted taken incorrectly -system.cpu.ipc 0.580355 # IPC: Instructions Per Cycle -system.cpu.ipc_total 0.580355 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 3075 # Type of FU issued +system.cpu.iew.predictedTakenIncorrect 48 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.633324 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.633324 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 3165 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist No_OpClass 0 0.00% # Type of FU issued - IntAlu 2178 70.83% # Type of FU issued + IntAlu 2243 70.87% # Type of FU issued IntMult 1 0.03% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 0 0.00% # Type of FU issued @@ -278,16 +279,16 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 561 18.24% # Type of FU issued - MemWrite 335 10.89% # Type of FU issued + MemRead 581 18.36% # Type of FU issued + MemWrite 340 10.74% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist system.cpu.iq.ISSUE:fu_busy_cnt 35 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.011382 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate 0.011058 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist No_OpClass 0 0.00% # attempts to use FU when none available - IntAlu 2 5.71% # attempts to use FU when none available + IntAlu 1 2.86% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -297,41 +298,60 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available MemRead 12 34.29% # attempts to use FU when none available - MemWrite 21 60.00% # attempts to use FU when none available + MemWrite 22 62.86% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 4113 +system.cpu.iq.ISSUE:issued_per_cycle.samples 3769 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 2848 6924.39% - 1 479 1164.60% - 2 276 671.04% - 3 213 517.87% - 4 158 384.15% - 5 86 209.09% - 6 34 82.66% - 7 13 31.61% - 8 6 14.59% + 0 2469 6550.81% + 1 494 1310.69% + 2 274 726.98% + 3 234 620.85% + 4 152 403.29% + 5 87 230.83% + 6 40 106.13% + 7 14 37.15% + 8 5 13.27% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 0.747629 # Inst issue rate -system.cpu.iq.iqInstsAdded 3330 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 3075 # Number of instructions issued -system.cpu.iq.iqNonSpecInstsAdded 7 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 790 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedNonSpecRemoved 3 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 409 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 270 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 4509.259259 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2388.888889 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 1217500 # number of ReadReq miss cycles +system.cpu.iq.ISSUE:rate 0.839745 # Inst issue rate +system.cpu.iq.iqInstsAdded 3463 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 3165 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 6 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 947 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 1 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 2 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 468 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadExReq_accesses 25 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 3720 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 2720 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 93000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 25 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 68000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 25 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 246 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 3357.723577 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2357.723577 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 826000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 270 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 645000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_misses 246 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 580000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 270 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses 246 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 13 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 3230.769231 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 2230.769231 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 42000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 13 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 29000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 13 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. @@ -340,32 +360,32 @@ system.cpu.l2cache.blocked_no_targets 0 # nu system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 270 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 4509.259259 # average overall miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency 2388.888889 # average overall mshr miss latency +system.cpu.l2cache.demand_accesses 271 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 3391.143911 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2391.143911 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 1217500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 919000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses -system.cpu.l2cache.demand_misses 270 # number of demand (read+write) misses +system.cpu.l2cache.demand_misses 271 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 645000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 648000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_misses 270 # number of demand (read+write) MSHR misses +system.cpu.l2cache.demand_mshr_misses 271 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 270 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 4509.259259 # average overall miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency 2388.888889 # average overall mshr miss latency +system.cpu.l2cache.overall_accesses 271 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 3391.143911 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2391.143911 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 1217500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 919000 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses -system.cpu.l2cache.overall_misses 270 # number of overall misses +system.cpu.l2cache.overall_misses 271 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 645000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 648000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_misses 270 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_misses 271 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -378,28 +398,28 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 270 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 233 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 158.313436 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 129.636467 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks -system.cpu.numCycles 4113 # number of cpu cycles simulated +system.cpu.numCycles 3769 # number of cpu cycles simulated system.cpu.rename.RENAME:CommittedMaps 1768 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 3116 # Number of cycles rename is idle +system.cpu.rename.RENAME:IdleCycles 2724 # Number of cycles rename is idle system.cpu.rename.RENAME:LSQFullEvents 1 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 4416 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 3886 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 2777 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 700 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 202 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 6 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 1009 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 89 # count of cycles rename stalled for serializing inst -system.cpu.rename.RENAME:serializingInsts 9 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 55 # count of insts added to the skid buffer -system.cpu.rename.RENAME:tempSerializingInsts 7 # count of temporary serializing insts renamed -system.cpu.timesIdled 8 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.rename.RENAME:RenameLookups 4613 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 4068 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 2909 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 733 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 225 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 7 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 1141 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 80 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 8 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 52 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 6 # count of temporary serializing insts renamed +system.cpu.timesIdled 2 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload.PROG:num_syscalls 4 # Number of system calls ---------- End Simulation Statistics ---------- diff --git a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout index c276fcaea..79e638bb8 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/o3-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 21 2007 21:25:27 -M5 started Fri Jun 22 00:04:44 2007 +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:13 2007 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/o3-timing tests/run.py quick/00.hello/alpha/tru64/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 2055000 because target called exit() +Exiting @ tick 1884000 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini index 61db8446a..16ea738bc 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=AtomicSimpleCPU -children=workload +children=tracer workload clock=500 cpu_id=0 defer_registration=false @@ -25,11 +25,15 @@ phase=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu.tracer width=1 workload=system.cpu.workload dcache_port=system.membus.port[2] icache_port=system.membus.port[1] +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt index 29351d427..dfc8b7f6b 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/m5stats.txt @@ -1,8 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 111994 # Simulator instruction rate (inst/s) -host_seconds 0.02 # Real time elapsed on the host -host_tick_rate 55017079 # Simulator tick rate (ticks/s) +host_inst_rate 34280 # Simulator instruction rate (inst/s) +host_mem_usage 147884 # Number of bytes of host memory used +host_seconds 0.08 # Real time elapsed on the host +host_tick_rate 17043200 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2578 # Number of instructions simulated sim_seconds 0.000001 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout index f76500526..6e78c47eb 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-atomic/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:37 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/simple-atomic tests/run.py quick/00.hello/alpha/tru64/simple-atomic +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:14 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/simple-atomic tests/run.py quick/00.hello/alpha/tru64/simple-atomic Global frequency set at 1000000000000 ticks per second Exiting @ tick 1288500 because target called exit() diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini index 5a336ab13..a9adf07b9 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=TimingSimpleCPU -children=dcache icache l2cache toL2Bus workload +children=dcache icache l2cache toL2Bus tracer workload clock=500 cpu_id=0 defer_registration=false @@ -24,17 +24,16 @@ max_loads_any_thread=0 phase=0 progress_interval=0 system=system +tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -52,12 +51,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.icache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -90,12 +85,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -128,12 +119,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -151,6 +140,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt index 621520fa3..56479827d 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/m5stats.txt @@ -1,12 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 51133 # Simulator instruction rate (inst/s) -host_seconds 0.05 # Real time elapsed on the host -host_tick_rate 127514531 # Simulator tick rate (ticks/s) +host_inst_rate 43962 # Simulator instruction rate (inst/s) +host_mem_usage 153564 # Number of bytes of host memory used +host_seconds 0.06 # Real time elapsed on the host +host_tick_rate 112042683 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 2578 # Number of instructions simulated -sim_seconds 0.000006 # Number of seconds simulated -sim_ticks 6472000 # Number of ticks simulated +sim_seconds 0.000007 # Number of seconds simulated +sim_ticks 6615000 # Number of ticks simulated system.cpu.dcache.ReadReq_accesses 415 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 14000 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 13000 # average ReadReq mshr miss latency @@ -20,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses 55 # nu system.cpu.dcache.WriteReq_accesses 294 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 14000 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 267 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 378000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.091837 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 27 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 351000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.091837 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 27 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_hits 256 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 532000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.129252 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 38 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 494000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.129252 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 38 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.dcache.avg_refs 7.646341 # Average number of references to valid blocks. @@ -38,14 +39,14 @@ system.cpu.dcache.cache_copies 0 # nu system.cpu.dcache.demand_accesses 709 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 13000 # average overall mshr miss latency -system.cpu.dcache.demand_hits 627 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1148000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.115656 # miss rate for demand accesses -system.cpu.dcache.demand_misses 82 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 616 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 1302000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.131171 # miss rate for demand accesses +system.cpu.dcache.demand_misses 93 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 1066000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.115656 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 82 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 1209000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.131171 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 93 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate @@ -53,14 +54,14 @@ system.cpu.dcache.overall_accesses 709 # nu system.cpu.dcache.overall_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 13000 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 627 # number of overall hits -system.cpu.dcache.overall_miss_latency 1148000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.115656 # miss rate for overall accesses -system.cpu.dcache.overall_misses 82 # number of overall misses +system.cpu.dcache.overall_hits 616 # number of overall hits +system.cpu.dcache.overall_miss_latency 1302000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.131171 # miss rate for overall accesses +system.cpu.dcache.overall_misses 93 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 1066000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.115656 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 82 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 1209000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.131171 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 93 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -75,7 +76,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 82 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 50.002941 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 50.044147 # Cycle average of tags in use system.cpu.dcache.total_refs 627 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -137,20 +138,38 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 163 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 86.067224 # Cycle average of tags in use +system.cpu.icache.tagsinuse 86.205303 # Cycle average of tags in use system.cpu.icache.total_refs 2416 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 245 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 13000 # average ReadReq miss latency +system.cpu.l2cache.ReadExReq_accesses 27 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 12000 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 11000 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 324000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 27 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 297000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 27 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 218 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 12000 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 11000 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 3185000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency 2616000 # number of ReadReq miss cycles system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 245 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 2695000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_misses 218 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 2398000 # number of ReadReq MSHR miss cycles system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 245 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses 218 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 11 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 12000 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 11000 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 132000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 11 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 121000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 11 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. @@ -160,10 +179,10 @@ system.cpu.l2cache.blocked_cycles_no_mshrs 0 # system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 245 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 3185000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 2940000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 245 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits @@ -174,11 +193,11 @@ system.cpu.l2cache.fast_writes 0 # nu system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 245 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 3185000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 2940000 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 245 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits @@ -197,14 +216,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 245 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 207 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 136.108021 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 109.774164 # Cycle average of tags in use system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 6472000 # number of cpu cycles simulated +system.cpu.numCycles 6615000 # number of cpu cycles simulated system.cpu.num_insts 2578 # Number of instructions executed system.cpu.num_refs 710 # Number of memory references system.cpu.workload.PROG:num_syscalls 4 # Number of system calls diff --git a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout index 1c6780cf0..47fca6faf 100644 --- a/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout +++ b/tests/quick/00.hello/ref/alpha/tru64/simple-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:37 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/00.hello/alpha/tru64/simple-timing tests/run.py quick/00.hello/alpha/tru64/simple-timing +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:14 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/tru64/simple-timing tests/run.py quick/00.hello/alpha/tru64/simple-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 6472000 because target called exit() +Exiting @ tick 6615000 because target called exit() diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini index ea3ba751b..c6807e6a7 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=AtomicSimpleCPU -children=workload +children=tracer workload clock=500 cpu_id=0 defer_registration=false @@ -25,11 +25,15 @@ phase=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu.tracer width=1 workload=system.cpu.workload dcache_port=system.membus.port[2] icache_port=system.membus.port[1] +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello @@ -53,7 +57,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port +port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port [system.physmem] type=PhysicalMemory diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt index 6a0c251b5..98d540d90 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 535701 # Simulator instruction rate (inst/s) -host_mem_usage 148368 # Number of bytes of host memory used -host_seconds 0.01 # Real time elapsed on the host -host_tick_rate 257653061 # Simulator tick rate (ticks/s) +host_inst_rate 25511 # Simulator instruction rate (inst/s) +host_mem_usage 149560 # Number of bytes of host memory used +host_seconds 0.22 # Real time elapsed on the host +host_tick_rate 12728361 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5657 # Number of instructions simulated sim_seconds 0.000003 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout index 7fb23e5a5..3919c7c81 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/mips/linux/simple-atomic/stdout @@ -6,8 +6,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 12:54:05 -M5 started Tue May 15 12:54:07 2007 +M5 compiled Aug 3 2007 04:06:41 +M5 started Fri Aug 3 04:31:09 2007 M5 executing on zizzer.eecs.umich.edu command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-atomic tests/run.py quick/00.hello/mips/linux/simple-atomic Global frequency set at 1000000000000 ticks per second diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini index a5d4e6583..c52036289 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=TimingSimpleCPU -children=dcache icache l2cache toL2Bus workload +children=dcache icache l2cache toL2Bus tracer workload clock=500 cpu_id=0 defer_registration=false @@ -24,17 +24,16 @@ max_loads_any_thread=0 phase=0 progress_interval=0 system=system +tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -44,7 +43,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -52,12 +51,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.icache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -82,7 +77,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -90,12 +85,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -120,7 +111,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=100000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -128,12 +119,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -151,6 +140,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello @@ -174,7 +166,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.l2cache.mem_side +port=system.physmem.port[0] system.cpu.l2cache.mem_side [system.physmem] type=PhysicalMemory diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt index 41bb7c8b7..985175cad 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/m5stats.txt @@ -1,13 +1,13 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 273338 # Simulator instruction rate (inst/s) -host_mem_usage 153844 # Number of bytes of host memory used -host_seconds 0.02 # Real time elapsed on the host -host_tick_rate 633390216 # Simulator tick rate (ticks/s) +host_inst_rate 45085 # Simulator instruction rate (inst/s) +host_mem_usage 155088 # Number of bytes of host memory used +host_seconds 0.13 # Real time elapsed on the host +host_tick_rate 101545982 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 5657 # Number of instructions simulated -sim_seconds 0.000013 # Number of seconds simulated -sim_ticks 13362000 # Number of ticks simulated +sim_seconds 0.000014 # Number of seconds simulated +sim_ticks 13544000 # Number of ticks simulated system.cpu.dcache.ReadReq_accesses 1130 # number of ReadReq accesses(hits+misses) system.cpu.dcache.ReadReq_avg_miss_latency 14000 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 13000 # average ReadReq mshr miss latency @@ -21,13 +21,13 @@ system.cpu.dcache.ReadReq_mshr_misses 82 # nu system.cpu.dcache.WriteReq_accesses 924 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 14000 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 874 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 700000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.054113 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 50 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 650000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.054113 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 50 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_hits 860 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 896000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.069264 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 64 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 832000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.069264 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 64 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.dcache.avg_refs 14.560606 # Average number of references to valid blocks. @@ -39,14 +39,14 @@ system.cpu.dcache.cache_copies 0 # nu system.cpu.dcache.demand_accesses 2054 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 13000 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1922 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1848000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.064265 # miss rate for demand accesses -system.cpu.dcache.demand_misses 132 # number of demand (read+write) misses +system.cpu.dcache.demand_hits 1908 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2044000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.071081 # miss rate for demand accesses +system.cpu.dcache.demand_misses 146 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 1716000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.064265 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 132 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 1898000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.071081 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 146 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate @@ -54,14 +54,14 @@ system.cpu.dcache.overall_accesses 2054 # nu system.cpu.dcache.overall_avg_miss_latency 14000 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 13000 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1922 # number of overall hits -system.cpu.dcache.overall_miss_latency 1848000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.064265 # miss rate for overall accesses -system.cpu.dcache.overall_misses 132 # number of overall misses +system.cpu.dcache.overall_hits 1908 # number of overall hits +system.cpu.dcache.overall_miss_latency 2044000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.071081 # miss rate for overall accesses +system.cpu.dcache.overall_misses 146 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 1716000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.064265 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 132 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 1898000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.071081 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 146 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -76,7 +76,7 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 132 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 85.283494 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 85.440937 # Cycle average of tags in use system.cpu.dcache.total_refs 1922 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks @@ -138,34 +138,52 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 13 # number of replacements system.cpu.icache.sampled_refs 303 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 136.309471 # Cycle average of tags in use +system.cpu.icache.tagsinuse 136.727640 # Cycle average of tags in use system.cpu.icache.total_refs 5355 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 435 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 13000 # average ReadReq miss latency +system.cpu.l2cache.ReadExReq_accesses 50 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 12000 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 11000 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 600000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 50 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 550000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 50 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 385 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 12000 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 11000 # average ReadReq mshr miss latency system.cpu.l2cache.ReadReq_hits 2 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 5629000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 0.995402 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 433 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 4763000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 0.995402 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 433 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_miss_latency 4596000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.994805 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 383 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 4213000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.994805 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 383 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 14 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 12000 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 11000 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 168000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 14 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 154000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 14 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.004619 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.005420 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed system.cpu.l2cache.demand_accesses 435 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.demand_hits 2 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 5629000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 5196000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate 0.995402 # miss rate for demand accesses system.cpu.l2cache.demand_misses 433 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits @@ -176,11 +194,11 @@ system.cpu.l2cache.fast_writes 0 # nu system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.l2cache.overall_accesses 435 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_hits 2 # number of overall hits -system.cpu.l2cache.overall_miss_latency 5629000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 5196000 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate 0.995402 # miss rate for overall accesses system.cpu.l2cache.overall_misses 433 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits @@ -199,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 433 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 369 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 222.872415 # Cycle average of tags in use +system.cpu.l2cache.tagsinuse 184.077317 # Cycle average of tags in use system.cpu.l2cache.total_refs 2 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 13362000 # number of cpu cycles simulated +system.cpu.numCycles 13544000 # number of cpu cycles simulated system.cpu.num_insts 5657 # Number of instructions executed system.cpu.num_refs 2055 # Number of memory references system.cpu.workload.PROG:num_syscalls 13 # Number of system calls diff --git a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout index 6b688641a..c24f82c4f 100644 --- a/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout +++ b/tests/quick/00.hello/ref/mips/linux/simple-timing/stdout @@ -6,9 +6,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 12:54:05 -M5 started Tue May 15 12:54:07 2007 +M5 compiled Aug 3 2007 04:06:41 +M5 started Fri Aug 3 04:31:10 2007 M5 executing on zizzer.eecs.umich.edu command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/simple-timing tests/run.py quick/00.hello/mips/linux/simple-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 13362000 because target called exit() +Exiting @ tick 13544000 because target called exit() diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini index 0e142e6ce..d7237a4af 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=AtomicSimpleCPU -children=workload +children=tracer workload clock=500 cpu_id=0 defer_registration=false @@ -25,11 +25,15 @@ phase=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu.tracer width=1 workload=system.cpu.workload dcache_port=system.membus.port[2] icache_port=system.membus.port[1] +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello @@ -53,7 +57,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.icache_port system.cpu.dcache_port +port=system.physmem.port[0] system.cpu.icache_port system.cpu.dcache_port [system.physmem] type=PhysicalMemory diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt index 8e0baaf8b..ab2e76d2a 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/m5stats.txt @@ -1,9 +1,9 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 439375 # Simulator instruction rate (inst/s) -host_mem_usage 149124 # Number of bytes of host memory used -host_seconds 0.01 # Real time elapsed on the host -host_tick_rate 211870315 # Simulator tick rate (ticks/s) +host_inst_rate 15625 # Simulator instruction rate (inst/s) +host_mem_usage 149968 # Number of bytes of host memory used +host_seconds 0.31 # Real time elapsed on the host +host_tick_rate 7799892 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 4863 # Number of instructions simulated sim_seconds 0.000002 # Number of seconds simulated diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout index 9e1770f92..40d1acccc 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout +++ b/tests/quick/00.hello/ref/sparc/linux/simple-atomic/stdout @@ -5,8 +5,8 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 13:02:31 -M5 started Tue May 15 17:00:05 2007 +M5 compiled Aug 3 2007 04:11:25 +M5 started Fri Aug 3 04:31:18 2007 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/00.hello/sparc/linux/simple-atomic tests/run.py quick/00.hello/sparc/linux/simple-atomic Global frequency set at 1000000000000 ticks per second diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini index fdb2bc3c9..4a945c9a3 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=TimingSimpleCPU -children=dcache icache l2cache toL2Bus workload +children=dcache icache l2cache toL2Bus tracer workload clock=500 cpu_id=0 defer_registration=false @@ -24,17 +24,16 @@ max_loads_any_thread=0 phase=0 progress_interval=0 system=system +tracer=system.cpu.tracer workload=system.cpu.workload dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -44,7 +43,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -52,12 +51,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -68,11 +65,9 @@ mem_side=system.cpu.toL2Bus.port[1] [system.cpu.icache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -82,7 +77,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=10000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -90,12 +85,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -106,11 +99,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -120,7 +111,7 @@ prefetch_access=false prefetch_cache_check_push=true prefetch_data_accesses_only=false prefetch_degree=1 -prefetch_latency=10 +prefetch_latency=100000 prefetch_miss=false prefetch_past_page=false prefetch_policy=none @@ -128,12 +119,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -151,6 +140,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload] type=LiveProcess cmd=hello @@ -174,7 +166,7 @@ bus_id=0 clock=1000 responder_set=false width=64 -port=system.physmem.port system.cpu.l2cache.mem_side +port=system.physmem.port[0] system.cpu.l2cache.mem_side [system.physmem] type=PhysicalMemory diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt index 839307810..7810c3335 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/m5stats.txt @@ -1,33 +1,33 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 239687 # Simulator instruction rate (inst/s) -host_mem_usage 154512 # Number of bytes of host memory used -host_seconds 0.02 # Real time elapsed on the host -host_tick_rate 542234464 # Simulator tick rate (ticks/s) +host_inst_rate 36222 # Simulator instruction rate (inst/s) +host_mem_usage 155556 # Number of bytes of host memory used +host_seconds 0.13 # Real time elapsed on the host +host_tick_rate 84966253 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 4863 # Number of instructions simulated sim_seconds 0.000011 # Number of seconds simulated -sim_ticks 11221000 # Number of ticks simulated +sim_ticks 11443000 # Number of ticks simulated system.cpu.dcache.ReadReq_accesses 608 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 13796.296296 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12796.296296 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_miss_latency 13962.962963 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12962.962963 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 554 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 745000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency 754000 # number of ReadReq miss cycles system.cpu.dcache.ReadReq_miss_rate 0.088816 # miss rate for ReadReq accesses system.cpu.dcache.ReadReq_misses 54 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 691000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency 700000 # number of ReadReq MSHR miss cycles system.cpu.dcache.ReadReq_mshr_miss_rate 0.088816 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 54 # number of ReadReq MSHR misses system.cpu.dcache.WriteReq_accesses 661 # number of WriteReq accesses(hits+misses) system.cpu.dcache.WriteReq_avg_miss_latency 14000 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 577 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 1176000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.127080 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 84 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 1092000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.127080 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 84 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_hits 562 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 1386000 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.149773 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 99 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 1287000 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.149773 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 99 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu.dcache.avg_refs 8.195652 # Average number of references to valid blocks. @@ -37,31 +37,31 @@ system.cpu.dcache.blocked_cycles_no_mshrs 0 # n system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed system.cpu.dcache.demand_accesses 1269 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 13920.289855 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 12920.289855 # average overall mshr miss latency -system.cpu.dcache.demand_hits 1131 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 1921000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.108747 # miss rate for demand accesses -system.cpu.dcache.demand_misses 138 # number of demand (read+write) misses +system.cpu.dcache.demand_avg_miss_latency 13986.928105 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 12986.928105 # average overall mshr miss latency +system.cpu.dcache.demand_hits 1116 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 2140000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.120567 # miss rate for demand accesses +system.cpu.dcache.demand_misses 153 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 1783000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.108747 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 138 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 1987000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.120567 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 153 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.dcache.overall_accesses 1269 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 13920.289855 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 12920.289855 # average overall mshr miss latency +system.cpu.dcache.overall_avg_miss_latency 13986.928105 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 12986.928105 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 1131 # number of overall hits -system.cpu.dcache.overall_miss_latency 1921000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.108747 # miss rate for overall accesses -system.cpu.dcache.overall_misses 138 # number of overall misses +system.cpu.dcache.overall_hits 1116 # number of overall hits +system.cpu.dcache.overall_miss_latency 2140000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.120567 # miss rate for overall accesses +system.cpu.dcache.overall_misses 153 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 1783000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.108747 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 138 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_miss_latency 1987000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.120567 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 153 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -76,18 +76,18 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.sampled_refs 138 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 83.705022 # Cycle average of tags in use +system.cpu.dcache.tagsinuse 83.865949 # Cycle average of tags in use system.cpu.dcache.total_refs 1131 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.icache.ReadReq_accesses 4864 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 13914.062500 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 12914.062500 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_avg_miss_latency 13984.375000 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 12984.375000 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 4608 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 3562000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency 3580000 # number of ReadReq miss cycles system.cpu.icache.ReadReq_miss_rate 0.052632 # miss rate for ReadReq accesses system.cpu.icache.ReadReq_misses 256 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 3306000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency 3324000 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.052632 # mshr miss rate for ReadReq accesses system.cpu.icache.ReadReq_mshr_misses 256 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked @@ -99,29 +99,29 @@ system.cpu.icache.blocked_cycles_no_mshrs 0 # n system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed system.cpu.icache.demand_accesses 4864 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 13914.062500 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 12914.062500 # average overall mshr miss latency +system.cpu.icache.demand_avg_miss_latency 13984.375000 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 12984.375000 # average overall mshr miss latency system.cpu.icache.demand_hits 4608 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 3562000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 3580000 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.052632 # miss rate for demand accesses system.cpu.icache.demand_misses 256 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 3306000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 3324000 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0.052632 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_misses 256 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate system.cpu.icache.overall_accesses 4864 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 13914.062500 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 12914.062500 # average overall mshr miss latency +system.cpu.icache.overall_avg_miss_latency 13984.375000 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 12984.375000 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_hits 4608 # number of overall hits -system.cpu.icache.overall_miss_latency 3562000 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 3580000 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.052632 # miss rate for overall accesses system.cpu.icache.overall_misses 256 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 3306000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 3324000 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0.052632 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_misses 256 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles @@ -138,53 +138,72 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 0 # number of replacements system.cpu.icache.sampled_refs 256 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 114.172725 # Cycle average of tags in use +system.cpu.icache.tagsinuse 114.646434 # Cycle average of tags in use system.cpu.icache.total_refs 4608 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0 # Percentage of idle cycles -system.cpu.l2cache.ReadReq_accesses 391 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency 13000 # average ReadReq miss latency +system.cpu.l2cache.ReadExReq_accesses 84 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency 12000 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 11000 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 1008000 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 84 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 924000 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 84 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 310 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 12000 # average ReadReq miss latency system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 11000 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_miss_latency 5083000 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 391 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 4301000 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 391 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_hits 3 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 3684000 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 0.990323 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 307 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 3377000 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 0.990323 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 307 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 15 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency 12000 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 11000 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 180000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 15 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 165000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 15 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.010274 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 391 # number of demand (read+write) accesses -system.cpu.l2cache.demand_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.demand_accesses 394 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency 11000 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 5083000 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_hits 3 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 4692000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 0.992386 # miss rate for demand accesses system.cpu.l2cache.demand_misses 391 # number of demand (read+write) misses system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_miss_latency 4301000 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate 0.992386 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 391 # number of demand (read+write) MSHR misses system.cpu.l2cache.fast_writes 0 # number of fast writes performed system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 391 # number of overall (read+write) accesses -system.cpu.l2cache.overall_avg_miss_latency 13000 # average overall miss latency +system.cpu.l2cache.overall_accesses 394 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 12000 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency 11000 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 5083000 # number of overall miss cycles -system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_hits 3 # number of overall hits +system.cpu.l2cache.overall_miss_latency 4692000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 0.992386 # miss rate for overall accesses system.cpu.l2cache.overall_misses 391 # number of overall misses system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_miss_latency 4301000 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate 0.992386 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 391 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -198,14 +217,14 @@ system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time system.cpu.l2cache.replacements 0 # number of replacements -system.cpu.l2cache.sampled_refs 391 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 292 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 196.304892 # Cycle average of tags in use -system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 133.135118 # Cycle average of tags in use +system.cpu.l2cache.total_refs 3 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles -system.cpu.numCycles 11221000 # number of cpu cycles simulated +system.cpu.numCycles 11443000 # number of cpu cycles simulated system.cpu.num_insts 4863 # Number of instructions executed system.cpu.num_refs 1269 # Number of memory references system.cpu.workload.PROG:num_syscalls 11 # Number of system calls diff --git a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout index 65bf4abca..1b34d79bb 100644 --- a/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout +++ b/tests/quick/00.hello/ref/sparc/linux/simple-timing/stdout @@ -5,9 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled May 15 2007 13:02:31 -M5 started Tue May 15 17:00:05 2007 +M5 compiled Aug 3 2007 04:11:25 +M5 started Fri Aug 3 04:31:19 2007 M5 executing on zizzer.eecs.umich.edu command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/00.hello/sparc/linux/simple-timing tests/run.py quick/00.hello/sparc/linux/simple-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 11221000 because target called exit() +Exiting @ tick 11443000 because target called exit() diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini index f03824f95..fea709a4d 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/config.ini @@ -11,7 +11,7 @@ physmem=system.physmem [system.cpu] type=DerivO3CPU -children=dcache fuPool icache l2cache toL2Bus workload0 workload1 +children=dcache fuPool icache l2cache toL2Bus tracer workload0 workload1 BTBEntries=4096 BTBTagSize=16 LFSTSize=1024 @@ -86,6 +86,7 @@ smtROBPolicy=Partitioned smtROBThreshold=100 squashWidth=8 system=system +tracer=system.cpu.tracer trapLatency=13 wbDepth=1 wbWidth=8 @@ -95,12 +96,9 @@ icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -118,12 +116,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=262144 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -139,11 +135,11 @@ FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUL [system.cpu.fuPool.FUList0] type=FUDesc -children=opList0 +children=opList count=6 -opList=system.cpu.fuPool.FUList0.opList0 +opList=system.cpu.fuPool.FUList0.opList -[system.cpu.fuPool.FUList0.opList0] +[system.cpu.fuPool.FUList0.opList] type=OpDesc issueLat=1 opClass=IntAlu @@ -217,11 +213,11 @@ opLat=24 [system.cpu.fuPool.FUList4] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList4.opList0 +opList=system.cpu.fuPool.FUList4.opList -[system.cpu.fuPool.FUList4.opList0] +[system.cpu.fuPool.FUList4.opList] type=OpDesc issueLat=1 opClass=MemRead @@ -229,11 +225,11 @@ opLat=1 [system.cpu.fuPool.FUList5] type=FUDesc -children=opList0 +children=opList count=0 -opList=system.cpu.fuPool.FUList5.opList0 +opList=system.cpu.fuPool.FUList5.opList -[system.cpu.fuPool.FUList5.opList0] +[system.cpu.fuPool.FUList5.opList] type=OpDesc issueLat=1 opClass=MemWrite @@ -259,11 +255,11 @@ opLat=1 [system.cpu.fuPool.FUList7] type=FUDesc -children=opList0 +children=opList count=1 -opList=system.cpu.fuPool.FUList7.opList0 +opList=system.cpu.fuPool.FUList7.opList -[system.cpu.fuPool.FUList7.opList0] +[system.cpu.fuPool.FUList7.opList] type=OpDesc issueLat=3 opClass=IprAccess @@ -271,12 +267,9 @@ opLat=3 [system.cpu.icache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -294,12 +287,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=131072 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=20 trace_addr=0 @@ -310,12 +301,9 @@ mem_side=system.cpu.toL2Bus.port[0] [system.cpu.l2cache] type=BaseCache -adaptive_compression=false addr_range=0:18446744073709551615 assoc=2 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -333,12 +321,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=2097152 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=5 trace_addr=0 @@ -356,6 +342,9 @@ responder_set=false width=64 port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side +[system.cpu.tracer] +type=ExeTracer + [system.cpu.workload0] type=LiveProcess cmd=hello diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt index 39a686d6b..259a48483 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/m5stats.txt @@ -2,47 +2,47 @@ ---------- Begin Simulation Statistics ---------- global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. global.BPredUnit.BTBHits 696 # Number of BTB hits -global.BPredUnit.BTBLookups 3414 # Number of BTB lookups -global.BPredUnit.RASInCorrect 125 # Number of incorrect RAS predictions. -global.BPredUnit.condIncorrect 1124 # Number of conditional branches incorrect -global.BPredUnit.condPredicted 2315 # Number of conditional branches predicted -global.BPredUnit.lookups 3940 # Number of BP lookups -global.BPredUnit.usedRAS 525 # Number of times the RAS was used to get a target. -host_inst_rate 52706 # Simulator instruction rate (inst/s) -host_mem_usage 154396 # Number of bytes of host memory used -host_seconds 0.21 # Real time elapsed on the host -host_tick_rate 25698682 # Simulator tick rate (ticks/s) -memdepunit.memDep.conflictingLoads 16 # Number of conflicting loads. -memdepunit.memDep.conflictingLoads 16 # Number of conflicting loads. -memdepunit.memDep.conflictingStores 53 # Number of conflicting stores. -memdepunit.memDep.conflictingStores 59 # Number of conflicting stores. -memdepunit.memDep.insertedLoads 1934 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedLoads 1903 # Number of loads inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1082 # Number of stores inserted to the mem dependence unit. -memdepunit.memDep.insertedStores 1090 # Number of stores inserted to the mem dependence unit. +global.BPredUnit.BTBLookups 3444 # Number of BTB lookups +global.BPredUnit.RASInCorrect 120 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 1098 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 2319 # Number of conditional branches predicted +global.BPredUnit.lookups 3987 # Number of BP lookups +global.BPredUnit.usedRAS 539 # Number of times the RAS was used to get a target. +host_inst_rate 43485 # Simulator instruction rate (inst/s) +host_mem_usage 155152 # Number of bytes of host memory used +host_seconds 0.26 # Real time elapsed on the host +host_tick_rate 19795862 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 10 # Number of conflicting loads. +memdepunit.memDep.conflictingLoads 10 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 48 # Number of conflicting stores. +memdepunit.memDep.conflictingStores 52 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 1908 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedLoads 1873 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1098 # Number of stores inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 1086 # Number of stores inserted to the mem dependence unit. sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 11247 # Number of instructions simulated sim_seconds 0.000005 # Number of seconds simulated -sim_ticks 5491500 # Number of ticks simulated +sim_ticks 5126000 # Number of ticks simulated system.cpu.commit.COM:branches 1724 # Number of branches committed system.cpu.commit.COM:branches_0 862 # Number of branches committed system.cpu.commit.COM:branches_1 862 # Number of branches committed -system.cpu.commit.COM:bw_lim_events 168 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_lim_events 164 # number cycles where commit BW limit reached system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_0 0 # number of insts not committed due to BW limits system.cpu.commit.COM:bw_limited_1 0 # number of insts not committed due to BW limits system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle -system.cpu.commit.COM:committed_per_cycle.samples 10926 +system.cpu.commit.COM:committed_per_cycle.samples 10208 system.cpu.commit.COM:committed_per_cycle.min_value 0 - 0 6353 5814.57% - 1 2078 1901.89% - 2 996 911.59% - 3 472 432.00% - 4 296 270.91% - 5 241 220.57% - 6 192 175.73% - 7 130 118.98% - 8 168 153.76% + 0 5629 5514.30% + 1 2071 2028.80% + 2 984 963.95% + 3 471 461.40% + 4 357 349.73% + 5 228 223.35% + 6 179 175.35% + 7 125 122.45% + 8 164 160.66% system.cpu.commit.COM:committed_per_cycle.max_value 8 system.cpu.commit.COM:committed_per_cycle.end_dist @@ -61,133 +61,133 @@ system.cpu.commit.COM:refs_1 1791 # Nu system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_0 0 # Number of s/w prefetches committed system.cpu.commit.COM:swp_count_1 0 # Number of s/w prefetches committed -system.cpu.commit.branchMispredicts 885 # The number of times a branch was mispredicted +system.cpu.commit.branchMispredicts 852 # The number of times a branch was mispredicted system.cpu.commit.commitCommittedInsts 11281 # The number of committed instructions system.cpu.commit.commitNonSpecStalls 34 # The number of times commit has been forced to stall to communicate backwards -system.cpu.commit.commitSquashedInsts 7777 # The number of squashed insts skipped by commit +system.cpu.commit.commitSquashedInsts 7556 # The number of squashed insts skipped by commit system.cpu.committedInsts_0 5623 # Number of Instructions Simulated system.cpu.committedInsts_1 5624 # Number of Instructions Simulated system.cpu.committedInsts_total 11247 # Number of Instructions Simulated -system.cpu.cpi_0 1.952872 # CPI: Cycles Per Instruction -system.cpu.cpi_1 1.952525 # CPI: Cycles Per Instruction -system.cpu.cpi_total 0.976349 # CPI: Total CPI of All Threads -system.cpu.dcache.ReadReq_accesses 2981 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_accesses_0 2981 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency_0 7040.892193 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 6979.591837 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 2712 # number of ReadReq hits -system.cpu.dcache.ReadReq_hits_0 2712 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 1894000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_latency_0 1894000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate_0 0.090238 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 269 # number of ReadReq misses -system.cpu.dcache.ReadReq_misses_0 269 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_hits 73 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_hits_0 73 # number of ReadReq MSHR hits -system.cpu.dcache.ReadReq_mshr_miss_latency 1368000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_latency_0 1368000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.065750 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 196 # number of ReadReq MSHR misses -system.cpu.dcache.ReadReq_mshr_misses_0 196 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 1624 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_accesses_0 1624 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency_0 5306.613226 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 5852.739726 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 1125 # number of WriteReq hits -system.cpu.dcache.WriteReq_hits_0 1125 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 2648000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_latency_0 2648000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate_0 0.307266 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 499 # number of WriteReq misses -system.cpu.dcache.WriteReq_misses_0 499 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_hits 353 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_hits_0 353 # number of WriteReq MSHR hits -system.cpu.dcache.WriteReq_mshr_miss_latency 854500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_latency_0 854500 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.089901 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 146 # number of WriteReq MSHR misses -system.cpu.dcache.WriteReq_mshr_misses_0 146 # number of WriteReq MSHR misses +system.cpu.cpi_0 1.822870 # CPI: Cycles Per Instruction +system.cpu.cpi_1 1.822546 # CPI: Cycles Per Instruction +system.cpu.cpi_total 0.911354 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2898 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_accesses_0 2898 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency_0 10388.059701 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency_0 7328.358209 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2697 # number of ReadReq hits +system.cpu.dcache.ReadReq_hits_0 2697 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 2088000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_latency_0 2088000 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate_0 0.069358 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 201 # number of ReadReq misses +system.cpu.dcache.ReadReq_misses_0 201 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 74 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_hits_0 74 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 1473000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_latency_0 1473000 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate_0 0.069358 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 201 # number of ReadReq MSHR misses +system.cpu.dcache.ReadReq_mshr_misses_0 201 # number of ReadReq MSHR misses +system.cpu.dcache.WriteReq_accesses 1265 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_accesses_0 1265 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency_0 16353.448276 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency_0 5663.793103 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 1091 # number of WriteReq hits +system.cpu.dcache.WriteReq_hits_0 1091 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 2845500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_latency_0 2845500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate_0 0.137549 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 174 # number of WriteReq misses +system.cpu.dcache.WriteReq_misses_0 174 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 359 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_hits_0 359 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 985500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_latency_0 985500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate_0 0.137549 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 174 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_mshr_misses_0 174 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 11.219298 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 10.997118 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 4605 # number of demand (read+write) accesses -system.cpu.dcache.demand_accesses_0 4605 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 4163 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses_0 4163 # number of demand (read+write) accesses system.cpu.dcache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency # average overall miss latency -system.cpu.dcache.demand_avg_miss_latency_0 5914.062500 # average overall miss latency +system.cpu.dcache.demand_avg_miss_latency_0 13156 # average overall miss latency system.cpu.dcache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency_0 6498.538012 # average overall mshr miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency_0 6556 # average overall mshr miss latency system.cpu.dcache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency -system.cpu.dcache.demand_hits 3837 # number of demand (read+write) hits -system.cpu.dcache.demand_hits_0 3837 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 3788 # number of demand (read+write) hits +system.cpu.dcache.demand_hits_0 3788 # number of demand (read+write) hits system.cpu.dcache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 4542000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_latency_0 4542000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency 4933500 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_latency_0 4933500 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.dcache.demand_miss_rate # miss rate for demand accesses -system.cpu.dcache.demand_miss_rate_0 0.166775 # miss rate for demand accesses +system.cpu.dcache.demand_miss_rate_0 0.090079 # miss rate for demand accesses system.cpu.dcache.demand_miss_rate_1 # miss rate for demand accesses -system.cpu.dcache.demand_misses 768 # number of demand (read+write) misses -system.cpu.dcache.demand_misses_0 768 # number of demand (read+write) misses +system.cpu.dcache.demand_misses 375 # number of demand (read+write) misses +system.cpu.dcache.demand_misses_0 375 # number of demand (read+write) misses system.cpu.dcache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.dcache.demand_mshr_hits 426 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_hits_0 426 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits 433 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_hits_0 433 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 2222500 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_latency_0 2222500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency 2458500 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_latency_0 2458500 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_miss_rate_0 0.074267 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate_0 0.090079 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 342 # number of demand (read+write) MSHR misses -system.cpu.dcache.demand_mshr_misses_0 342 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses 375 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_misses_0 375 # number of demand (read+write) MSHR misses system.cpu.dcache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.dcache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 4605 # number of overall (read+write) accesses -system.cpu.dcache.overall_accesses_0 4605 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 4163 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses_0 4163 # number of overall (read+write) accesses system.cpu.dcache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency # average overall miss latency -system.cpu.dcache.overall_avg_miss_latency_0 5914.062500 # average overall miss latency +system.cpu.dcache.overall_avg_miss_latency_0 13156 # average overall miss latency system.cpu.dcache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency_0 6498.538012 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency_0 6556 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency_1 # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 3837 # number of overall hits -system.cpu.dcache.overall_hits_0 3837 # number of overall hits +system.cpu.dcache.overall_hits 3788 # number of overall hits +system.cpu.dcache.overall_hits_0 3788 # number of overall hits system.cpu.dcache.overall_hits_1 0 # number of overall hits -system.cpu.dcache.overall_miss_latency 4542000 # number of overall miss cycles -system.cpu.dcache.overall_miss_latency_0 4542000 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency 4933500 # number of overall miss cycles +system.cpu.dcache.overall_miss_latency_0 4933500 # number of overall miss cycles system.cpu.dcache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.dcache.overall_miss_rate # miss rate for overall accesses -system.cpu.dcache.overall_miss_rate_0 0.166775 # miss rate for overall accesses +system.cpu.dcache.overall_miss_rate_0 0.090079 # miss rate for overall accesses system.cpu.dcache.overall_miss_rate_1 # miss rate for overall accesses -system.cpu.dcache.overall_misses 768 # number of overall misses -system.cpu.dcache.overall_misses_0 768 # number of overall misses +system.cpu.dcache.overall_misses 375 # number of overall misses +system.cpu.dcache.overall_misses_0 375 # number of overall misses system.cpu.dcache.overall_misses_1 0 # number of overall misses -system.cpu.dcache.overall_mshr_hits 426 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_hits_0 426 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits 433 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_hits_0 433 # number of overall MSHR hits system.cpu.dcache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 2222500 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_latency_0 2222500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency 2458500 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_latency_0 2458500 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_miss_rate_0 0.074267 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate_0 0.090079 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 342 # number of overall MSHR misses -system.cpu.dcache.overall_mshr_misses_0 342 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses 375 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_misses_0 375 # number of overall MSHR misses system.cpu.dcache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -207,149 +207,149 @@ system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.dcache.replacements 0 # number of replacements system.cpu.dcache.replacements_0 0 # number of replacements system.cpu.dcache.replacements_1 0 # number of replacements -system.cpu.dcache.sampled_refs 342 # Sample count of references to valid blocks. +system.cpu.dcache.sampled_refs 347 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 221.287284 # Cycle average of tags in use -system.cpu.dcache.total_refs 3837 # Total number of references to valid blocks. +system.cpu.dcache.tagsinuse 222.253048 # Cycle average of tags in use +system.cpu.dcache.total_refs 3816 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 0 # number of writebacks system.cpu.dcache.writebacks_0 0 # number of writebacks system.cpu.dcache.writebacks_1 0 # number of writebacks -system.cpu.decode.DECODE:BlockedCycles 1876 # Number of cycles decode is blocked -system.cpu.decode.DECODE:BranchMispred 246 # Number of times decode detected a branch misprediction -system.cpu.decode.DECODE:BranchResolved 345 # Number of times decode resolved a branch -system.cpu.decode.DECODE:DecodedInsts 21769 # Number of instructions handled by decode -system.cpu.decode.DECODE:IdleCycles 14522 # Number of cycles decode is idle -system.cpu.decode.DECODE:RunCycles 3673 # Number of cycles decode is running -system.cpu.decode.DECODE:SquashCycles 1511 # Number of cycles decode is squashing -system.cpu.decode.DECODE:SquashedInsts 346 # Number of squashed instructions handled by decode -system.cpu.decode.DECODE:UnblockCycles 145 # Number of cycles decode is unblocking -system.cpu.fetch.Branches 3940 # Number of branches that fetch encountered -system.cpu.fetch.CacheLines 3009 # Number of cache lines fetched -system.cpu.fetch.Cycles 6972 # Number of cycles fetch has run and was not squashing or blocked -system.cpu.fetch.IcacheSquashes 537 # Number of outstanding Icache misses that were squashed -system.cpu.fetch.Insts 23897 # Number of instructions fetch has processed -system.cpu.fetch.SquashCycles 1189 # Number of cycles fetch has spent squashing -system.cpu.fetch.branchRate 0.358802 # Number of branch fetches per cycle -system.cpu.fetch.icacheStallCycles 3009 # Number of cycles fetch is stalled on an Icache miss -system.cpu.fetch.predictedBranches 1221 # Number of branches that fetch has predicted taken -system.cpu.fetch.rate 2.176213 # Number of inst fetches per cycle +system.cpu.decode.DECODE:BlockedCycles 1825 # Number of cycles decode is blocked +system.cpu.decode.DECODE:BranchMispred 258 # Number of times decode detected a branch misprediction +system.cpu.decode.DECODE:BranchResolved 356 # Number of times decode resolved a branch +system.cpu.decode.DECODE:DecodedInsts 21887 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 13153 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 3652 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 1444 # Number of cycles decode is squashing +system.cpu.decode.DECODE:SquashedInsts 304 # Number of squashed instructions handled by decode +system.cpu.decode.DECODE:UnblockCycles 187 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 3987 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 2956 # Number of cache lines fetched +system.cpu.fetch.Cycles 6947 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 423 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 24040 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 1159 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.388976 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 2956 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 1235 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 2.345366 # Number of inst fetches per cycle system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) -system.cpu.fetch.rateDist.samples 10981 +system.cpu.fetch.rateDist.samples 10250 system.cpu.fetch.rateDist.min_value 0 - 0 7019 6391.95% - 1 293 266.82% - 2 225 204.90% - 3 260 236.77% - 4 345 314.18% - 5 288 262.27% - 6 304 276.84% - 7 246 224.02% - 8 2001 1822.24% + 0 6260 6107.32% + 1 296 288.78% + 2 229 223.41% + 3 268 261.46% + 4 338 329.76% + 5 294 286.83% + 6 303 295.61% + 7 254 247.80% + 8 2008 1959.02% system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 3009 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_accesses_0 3009 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency_0 5911.144578 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 5119.774920 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 2345 # number of ReadReq hits -system.cpu.icache.ReadReq_hits_0 2345 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 3925000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_latency_0 3925000 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate_0 0.220671 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 664 # number of ReadReq misses -system.cpu.icache.ReadReq_misses_0 664 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_hits 42 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_hits_0 42 # number of ReadReq MSHR hits -system.cpu.icache.ReadReq_mshr_miss_latency 3184500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_latency_0 3184500 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate_0 0.206713 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 622 # number of ReadReq MSHR misses -system.cpu.icache.ReadReq_mshr_misses_0 622 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_accesses 2906 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_accesses_0 2906 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency_0 6488.691438 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency_0 5218.093700 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 2287 # number of ReadReq hits +system.cpu.icache.ReadReq_hits_0 2287 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 4016500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_latency_0 4016500 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate_0 0.213008 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 619 # number of ReadReq misses +system.cpu.icache.ReadReq_misses_0 619 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 50 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_hits_0 50 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 3230000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_latency_0 3230000 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate_0 0.213008 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 619 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_mshr_misses_0 619 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 3.770096 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 3.694669 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 3009 # number of demand (read+write) accesses -system.cpu.icache.demand_accesses_0 3009 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses 2906 # number of demand (read+write) accesses +system.cpu.icache.demand_accesses_0 2906 # number of demand (read+write) accesses system.cpu.icache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.icache.demand_avg_miss_latency # average overall miss latency -system.cpu.icache.demand_avg_miss_latency_0 5911.144578 # average overall miss latency +system.cpu.icache.demand_avg_miss_latency_0 6488.691438 # average overall miss latency system.cpu.icache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.icache.demand_avg_mshr_miss_latency_0 5119.774920 # average overall mshr miss latency +system.cpu.icache.demand_avg_mshr_miss_latency_0 5218.093700 # average overall mshr miss latency system.cpu.icache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency -system.cpu.icache.demand_hits 2345 # number of demand (read+write) hits -system.cpu.icache.demand_hits_0 2345 # number of demand (read+write) hits +system.cpu.icache.demand_hits 2287 # number of demand (read+write) hits +system.cpu.icache.demand_hits_0 2287 # number of demand (read+write) hits system.cpu.icache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 3925000 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_latency_0 3925000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency 4016500 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_latency_0 4016500 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate # miss rate for demand accesses -system.cpu.icache.demand_miss_rate_0 0.220671 # miss rate for demand accesses +system.cpu.icache.demand_miss_rate_0 0.213008 # miss rate for demand accesses system.cpu.icache.demand_miss_rate_1 # miss rate for demand accesses -system.cpu.icache.demand_misses 664 # number of demand (read+write) misses -system.cpu.icache.demand_misses_0 664 # number of demand (read+write) misses +system.cpu.icache.demand_misses 619 # number of demand (read+write) misses +system.cpu.icache.demand_misses_0 619 # number of demand (read+write) misses system.cpu.icache.demand_misses_1 0 # number of demand (read+write) misses -system.cpu.icache.demand_mshr_hits 42 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_hits_0 42 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits 50 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_hits_0 50 # number of demand (read+write) MSHR hits system.cpu.icache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 3184500 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_latency_0 3184500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency 3230000 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_latency_0 3230000 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_miss_rate_0 0.206713 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_miss_rate_0 0.213008 # mshr miss rate for demand accesses system.cpu.icache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 622 # number of demand (read+write) MSHR misses -system.cpu.icache.demand_mshr_misses_0 622 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses 619 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_misses_0 619 # number of demand (read+write) MSHR misses system.cpu.icache.demand_mshr_misses_1 0 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.icache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 3009 # number of overall (read+write) accesses -system.cpu.icache.overall_accesses_0 3009 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses 2906 # number of overall (read+write) accesses +system.cpu.icache.overall_accesses_0 2906 # number of overall (read+write) accesses system.cpu.icache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.icache.overall_avg_miss_latency # average overall miss latency -system.cpu.icache.overall_avg_miss_latency_0 5911.144578 # average overall miss latency +system.cpu.icache.overall_avg_miss_latency_0 6488.691438 # average overall miss latency system.cpu.icache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.icache.overall_avg_mshr_miss_latency_0 5119.774920 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_miss_latency_0 5218.093700 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency system.cpu.icache.overall_avg_mshr_uncacheable_latency_1 # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 2345 # number of overall hits -system.cpu.icache.overall_hits_0 2345 # number of overall hits +system.cpu.icache.overall_hits 2287 # number of overall hits +system.cpu.icache.overall_hits_0 2287 # number of overall hits system.cpu.icache.overall_hits_1 0 # number of overall hits -system.cpu.icache.overall_miss_latency 3925000 # number of overall miss cycles -system.cpu.icache.overall_miss_latency_0 3925000 # number of overall miss cycles +system.cpu.icache.overall_miss_latency 4016500 # number of overall miss cycles +system.cpu.icache.overall_miss_latency_0 4016500 # number of overall miss cycles system.cpu.icache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.icache.overall_miss_rate # miss rate for overall accesses -system.cpu.icache.overall_miss_rate_0 0.220671 # miss rate for overall accesses +system.cpu.icache.overall_miss_rate_0 0.213008 # miss rate for overall accesses system.cpu.icache.overall_miss_rate_1 # miss rate for overall accesses -system.cpu.icache.overall_misses 664 # number of overall misses -system.cpu.icache.overall_misses_0 664 # number of overall misses +system.cpu.icache.overall_misses 619 # number of overall misses +system.cpu.icache.overall_misses_0 619 # number of overall misses system.cpu.icache.overall_misses_1 0 # number of overall misses -system.cpu.icache.overall_mshr_hits 42 # number of overall MSHR hits -system.cpu.icache.overall_mshr_hits_0 42 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits 50 # number of overall MSHR hits +system.cpu.icache.overall_mshr_hits_0 50 # number of overall MSHR hits system.cpu.icache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 3184500 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_latency_0 3184500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency 3230000 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_latency_0 3230000 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_miss_rate_0 0.206713 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_miss_rate_0 0.213008 # mshr miss rate for overall accesses system.cpu.icache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 622 # number of overall MSHR misses -system.cpu.icache.overall_mshr_misses_0 622 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses 619 # number of overall MSHR misses +system.cpu.icache.overall_mshr_misses_0 619 # number of overall MSHR misses system.cpu.icache.overall_mshr_misses_1 0 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_latency_0 0 # number of overall MSHR uncacheable cycles @@ -369,104 +369,104 @@ system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.icache.replacements 9 # number of replacements system.cpu.icache.replacements_0 9 # number of replacements system.cpu.icache.replacements_1 0 # number of replacements -system.cpu.icache.sampled_refs 622 # Sample count of references to valid blocks. +system.cpu.icache.sampled_refs 619 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 323.196356 # Cycle average of tags in use -system.cpu.icache.total_refs 2345 # Total number of references to valid blocks. +system.cpu.icache.tagsinuse 320.555850 # Cycle average of tags in use +system.cpu.icache.total_refs 2287 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.icache.writebacks_0 0 # number of writebacks system.cpu.icache.writebacks_1 0 # number of writebacks system.cpu.idleCycles 2997 # Total number of cycles that the CPU has spent unscheduled due to idling -system.cpu.iew.EXEC:branches 2377 # Number of branches executed -system.cpu.iew.EXEC:branches_0 1192 # Number of branches executed -system.cpu.iew.EXEC:branches_1 1185 # Number of branches executed -system.cpu.iew.EXEC:nop 72 # number of nop insts executed +system.cpu.iew.EXEC:branches 2346 # Number of branches executed +system.cpu.iew.EXEC:branches_0 1165 # Number of branches executed +system.cpu.iew.EXEC:branches_1 1181 # Number of branches executed +system.cpu.iew.EXEC:nop 71 # number of nop insts executed system.cpu.iew.EXEC:nop_0 37 # number of nop insts executed -system.cpu.iew.EXEC:nop_1 35 # number of nop insts executed -system.cpu.iew.EXEC:rate 1.419725 # Inst execution rate -system.cpu.iew.EXEC:refs 5002 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_0 2507 # number of memory reference insts executed -system.cpu.iew.EXEC:refs_1 2495 # number of memory reference insts executed -system.cpu.iew.EXEC:stores 1874 # Number of stores executed -system.cpu.iew.EXEC:stores_0 933 # Number of stores executed -system.cpu.iew.EXEC:stores_1 941 # Number of stores executed +system.cpu.iew.EXEC:nop_1 34 # number of nop insts executed +system.cpu.iew.EXEC:rate 1.504390 # Inst execution rate +system.cpu.iew.EXEC:refs 4985 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_0 2501 # number of memory reference insts executed +system.cpu.iew.EXEC:refs_1 2484 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 1875 # Number of stores executed +system.cpu.iew.EXEC:stores_0 943 # Number of stores executed +system.cpu.iew.EXEC:stores_1 932 # Number of stores executed system.cpu.iew.EXEC:swp 0 # number of swp insts executed system.cpu.iew.EXEC:swp_0 0 # number of swp insts executed system.cpu.iew.EXEC:swp_1 0 # number of swp insts executed -system.cpu.iew.WB:consumers 10260 # num instructions consuming a value -system.cpu.iew.WB:consumers_0 5135 # num instructions consuming a value -system.cpu.iew.WB:consumers_1 5125 # num instructions consuming a value -system.cpu.iew.WB:count 14994 # cumulative count of insts written-back -system.cpu.iew.WB:count_0 7526 # cumulative count of insts written-back -system.cpu.iew.WB:count_1 7468 # cumulative count of insts written-back -system.cpu.iew.WB:fanout 1.530607 # average fanout of values written-back -system.cpu.iew.WB:fanout_0 0.763778 # average fanout of values written-back -system.cpu.iew.WB:fanout_1 0.766829 # average fanout of values written-back +system.cpu.iew.WB:consumers 10076 # num instructions consuming a value +system.cpu.iew.WB:consumers_0 5067 # num instructions consuming a value +system.cpu.iew.WB:consumers_1 5009 # num instructions consuming a value +system.cpu.iew.WB:count 14858 # cumulative count of insts written-back +system.cpu.iew.WB:count_0 7442 # cumulative count of insts written-back +system.cpu.iew.WB:count_1 7416 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 1.533770 # average fanout of values written-back +system.cpu.iew.WB:fanout_0 0.764555 # average fanout of values written-back +system.cpu.iew.WB:fanout_1 0.769215 # average fanout of values written-back system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_0 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_1 0 # number of instrctions required to write to 'other' IQ system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_0 0 # fraction of instructions written-back that wrote to 'other' IQ system.cpu.iew.WB:penalized_rate_1 0 # fraction of instructions written-back that wrote to 'other' IQ -system.cpu.iew.WB:producers 7852 # num instructions producing a value -system.cpu.iew.WB:producers_0 3922 # num instructions producing a value -system.cpu.iew.WB:producers_1 3930 # num instructions producing a value -system.cpu.iew.WB:rate 1.365449 # insts written-back per cycle -system.cpu.iew.WB:rate_0 0.685366 # insts written-back per cycle -system.cpu.iew.WB:rate_1 0.680084 # insts written-back per cycle -system.cpu.iew.WB:sent 15132 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_0 7582 # cumulative count of insts sent to commit -system.cpu.iew.WB:sent_1 7550 # cumulative count of insts sent to commit -system.cpu.iew.branchMispredicts 958 # Number of branch mispredicts detected at execute -system.cpu.iew.iewBlockCycles 6 # Number of cycles IEW is blocking -system.cpu.iew.iewDispLoadInsts 3837 # Number of dispatched load instructions +system.cpu.iew.WB:producers 7727 # num instructions producing a value +system.cpu.iew.WB:producers_0 3874 # num instructions producing a value +system.cpu.iew.WB:producers_1 3853 # num instructions producing a value +system.cpu.iew.WB:rate 1.449561 # insts written-back per cycle +system.cpu.iew.WB:rate_0 0.726049 # insts written-back per cycle +system.cpu.iew.WB:rate_1 0.723512 # insts written-back per cycle +system.cpu.iew.WB:sent 14990 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_0 7512 # cumulative count of insts sent to commit +system.cpu.iew.WB:sent_1 7478 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 976 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 8 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 3781 # Number of dispatched load instructions system.cpu.iew.iewDispNonSpecInsts 42 # Number of dispatched non-speculative instructions -system.cpu.iew.iewDispSquashedInsts 445 # Number of squashed instructions skipped by dispatch -system.cpu.iew.iewDispStoreInsts 2172 # Number of dispatched store instructions -system.cpu.iew.iewDispatchedInsts 19086 # Number of instructions dispatched to IQ -system.cpu.iew.iewExecLoadInsts 3128 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_0 1574 # Number of load instructions executed -system.cpu.iew.iewExecLoadInsts_1 1554 # Number of load instructions executed -system.cpu.iew.iewExecSquashedInsts 852 # Number of squashed instructions skipped in execute -system.cpu.iew.iewExecutedInsts 15590 # Number of executed instructions -system.cpu.iew.iewIQFullEvents 0 # Number of times the IQ has become full, causing a stall +system.cpu.iew.iewDispSquashedInsts 481 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2184 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 18854 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 3110 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_0 1558 # Number of load instructions executed +system.cpu.iew.iewExecLoadInsts_1 1552 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 865 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 15420 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 2 # Number of times the IQ has become full, causing a stall system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall -system.cpu.iew.iewSquashCycles 1511 # Number of cycles IEW is squashing -system.cpu.iew.iewUnblockCycles 0 # Number of cycles IEW is unblocking +system.cpu.iew.iewSquashCycles 1444 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 2 # Number of cycles IEW is unblocking system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.0.forwLoads 43 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.0.ignoredResponses 4 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.forwLoads 37 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 0 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.0.memOrderViolation 64 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.memOrderViolation 65 # Number of memory ordering violations system.cpu.iew.lsq.thread.0.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.0.squashedLoads 955 # Number of loads squashed -system.cpu.iew.lsq.thread.0.squashedStores 270 # Number of stores squashed +system.cpu.iew.lsq.thread.0.squashedLoads 929 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 286 # Number of stores squashed system.cpu.iew.lsq.thread.1.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding system.cpu.iew.lsq.thread.1.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked -system.cpu.iew.lsq.thread.1.forwLoads 42 # Number of loads that had data forwarded from stores -system.cpu.iew.lsq.thread.1.ignoredResponses 2 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.1.forwLoads 44 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.1.ignoredResponses 3 # Number of memory responses ignored because the instruction is squashed system.cpu.iew.lsq.thread.1.invAddrLoads 0 # Number of loads ignored due to an invalid address system.cpu.iew.lsq.thread.1.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address -system.cpu.iew.lsq.thread.1.memOrderViolation 58 # Number of memory ordering violations +system.cpu.iew.lsq.thread.1.memOrderViolation 65 # Number of memory ordering violations system.cpu.iew.lsq.thread.1.rescheduledLoads 1 # Number of loads that were rescheduled -system.cpu.iew.lsq.thread.1.squashedLoads 924 # Number of loads squashed -system.cpu.iew.lsq.thread.1.squashedStores 278 # Number of stores squashed -system.cpu.iew.memOrderViolationEvents 122 # Number of memory order violations -system.cpu.iew.predictedNotTakenIncorrect 767 # Number of branches that were predicted not taken incorrectly -system.cpu.iew.predictedTakenIncorrect 191 # Number of branches that were predicted taken incorrectly -system.cpu.ipc_0 0.512066 # IPC: Instructions Per Cycle -system.cpu.ipc_1 0.512157 # IPC: Instructions Per Cycle -system.cpu.ipc_total 1.024224 # IPC: Total IPC of All Threads -system.cpu.iq.ISSUE:FU_type_0 8235 # Type of FU issued +system.cpu.iew.lsq.thread.1.squashedLoads 894 # Number of loads squashed +system.cpu.iew.lsq.thread.1.squashedStores 274 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 130 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 783 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 193 # Number of branches that were predicted taken incorrectly +system.cpu.ipc_0 0.548585 # IPC: Instructions Per Cycle +system.cpu.ipc_1 0.548683 # IPC: Instructions Per Cycle +system.cpu.ipc_total 1.097268 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 8178 # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.start_dist No_OpClass 2 0.02% # Type of FU issued - IntAlu 5567 67.60% # Type of FU issued + IntAlu 5509 67.36% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.02% # Type of FU issued @@ -475,15 +475,15 @@ system.cpu.iq.ISSUE:FU_type_0.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1702 20.67% # Type of FU issued - MemWrite 961 11.67% # Type of FU issued + MemRead 1692 20.69% # Type of FU issued + MemWrite 972 11.89% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_0.end_dist -system.cpu.iq.ISSUE:FU_type_1 8207 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_1 8107 # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.start_dist No_OpClass 2 0.02% # Type of FU issued - IntAlu 5547 67.59% # Type of FU issued + IntAlu 5452 67.25% # Type of FU issued IntMult 1 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 2 0.02% # Type of FU issued @@ -492,15 +492,15 @@ system.cpu.iq.ISSUE:FU_type_1.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 1690 20.59% # Type of FU issued - MemWrite 965 11.76% # Type of FU issued + MemRead 1681 20.74% # Type of FU issued + MemWrite 969 11.95% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type_1.end_dist -system.cpu.iq.ISSUE:FU_type 16442 # Type of FU issued +system.cpu.iq.ISSUE:FU_type 16285 # Type of FU issued system.cpu.iq.ISSUE:FU_type.start_dist No_OpClass 4 0.02% # Type of FU issued - IntAlu 11114 67.60% # Type of FU issued + IntAlu 10961 67.31% # Type of FU issued IntMult 2 0.01% # Type of FU issued IntDiv 0 0.00% # Type of FU issued FloatAdd 4 0.02% # Type of FU issued @@ -509,20 +509,20 @@ system.cpu.iq.ISSUE:FU_type.start_dist FloatMult 0 0.00% # Type of FU issued FloatDiv 0 0.00% # Type of FU issued FloatSqrt 0 0.00% # Type of FU issued - MemRead 3392 20.63% # Type of FU issued - MemWrite 1926 11.71% # Type of FU issued + MemRead 3373 20.71% # Type of FU issued + MemWrite 1941 11.92% # Type of FU issued IprAccess 0 0.00% # Type of FU issued InstPrefetch 0 0.00% # Type of FU issued system.cpu.iq.ISSUE:FU_type.end_dist -system.cpu.iq.ISSUE:fu_busy_cnt 189 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_0 98 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_cnt_1 91 # FU busy when requested -system.cpu.iq.ISSUE:fu_busy_rate 0.011495 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_0 0.005960 # FU busy rate (busy events/executed inst) -system.cpu.iq.ISSUE:fu_busy_rate_1 0.005535 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_cnt 180 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_0 92 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_cnt_1 88 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.011053 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_0 0.005649 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_busy_rate_1 0.005404 # FU busy rate (busy events/executed inst) system.cpu.iq.ISSUE:fu_full.start_dist No_OpClass 0 0.00% # attempts to use FU when none available - IntAlu 14 7.41% # attempts to use FU when none available + IntAlu 7 3.89% # attempts to use FU when none available IntMult 0 0.00% # attempts to use FU when none available IntDiv 0 0.00% # attempts to use FU when none available FloatAdd 0 0.00% # attempts to use FU when none available @@ -531,75 +531,103 @@ system.cpu.iq.ISSUE:fu_full.start_dist FloatMult 0 0.00% # attempts to use FU when none available FloatDiv 0 0.00% # attempts to use FU when none available FloatSqrt 0 0.00% # attempts to use FU when none available - MemRead 107 56.61% # attempts to use FU when none available - MemWrite 68 35.98% # attempts to use FU when none available + MemRead 112 62.22% # attempts to use FU when none available + MemWrite 61 33.89% # attempts to use FU when none available IprAccess 0 0.00% # attempts to use FU when none available InstPrefetch 0 0.00% # attempts to use FU when none available system.cpu.iq.ISSUE:fu_full.end_dist system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle -system.cpu.iq.ISSUE:issued_per_cycle.samples 10981 +system.cpu.iq.ISSUE:issued_per_cycle.samples 10250 system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 - 0 4775 4348.42% - 1 1817 1654.68% - 2 1638 1491.67% - 3 1107 1008.10% - 4 745 678.44% - 5 490 446.23% - 6 287 261.36% - 7 100 91.07% - 8 22 20.03% + 0 4091 3991.22% + 1 1777 1733.66% + 2 1632 1592.20% + 3 1101 1074.15% + 4 778 759.02% + 5 523 510.24% + 6 249 242.93% + 7 72 70.24% + 8 27 26.34% system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 system.cpu.iq.ISSUE:issued_per_cycle.end_dist -system.cpu.iq.ISSUE:rate 1.497314 # Inst issue rate -system.cpu.iq.iqInstsAdded 18972 # Number of instructions added to the IQ (excludes non-spec) -system.cpu.iq.iqInstsIssued 16442 # Number of instructions issued +system.cpu.iq.ISSUE:rate 1.588780 # Inst issue rate +system.cpu.iq.iqInstsAdded 18741 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 16285 # Number of instructions issued system.cpu.iq.iqNonSpecInstsAdded 42 # Number of non-speculative instructions added to the IQ -system.cpu.iq.iqSquashedInstsExamined 6918 # Number of squashed instructions iterated over during squash; mainly for profiling -system.cpu.iq.iqSquashedInstsIssued 63 # Number of squashed instructions issued +system.cpu.iq.iqSquashedInstsExamined 6728 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 34 # Number of squashed instructions issued system.cpu.iq.iqSquashedNonSpecRemoved 8 # Number of squashed non-spec instructions that were removed -system.cpu.iq.iqSquashedOperandsExamined 4274 # Number of squashed operands that are examined and possibly removed from graph -system.cpu.l2cache.ReadReq_accesses 962 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_accesses_0 962 # number of ReadReq accesses(hits+misses) -system.cpu.l2cache.ReadReq_avg_miss_latency_0 5208.636837 # average ReadReq miss latency -system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 2724.765869 # average ReadReq mshr miss latency -system.cpu.l2cache.ReadReq_hits 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_hits_0 1 # number of ReadReq hits -system.cpu.l2cache.ReadReq_miss_latency 5005500 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_latency_0 5005500 # number of ReadReq miss cycles -system.cpu.l2cache.ReadReq_miss_rate_0 0.998960 # miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_misses 961 # number of ReadReq misses -system.cpu.l2cache.ReadReq_misses_0 961 # number of ReadReq misses -system.cpu.l2cache.ReadReq_mshr_miss_latency 2618500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_latency_0 2618500 # number of ReadReq MSHR miss cycles -system.cpu.l2cache.ReadReq_mshr_miss_rate_0 0.998960 # mshr miss rate for ReadReq accesses -system.cpu.l2cache.ReadReq_mshr_misses 961 # number of ReadReq MSHR misses -system.cpu.l2cache.ReadReq_mshr_misses_0 961 # number of ReadReq MSHR misses +system.cpu.iq.iqSquashedOperandsExamined 4160 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadExReq_accesses 146 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_accesses_0 146 # number of ReadExReq accesses(hits+misses) +system.cpu.l2cache.ReadExReq_avg_miss_latency_0 3893.835616 # average ReadExReq miss latency +system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency_0 2893.835616 # average ReadExReq mshr miss latency +system.cpu.l2cache.ReadExReq_miss_latency 568500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_latency_0 568500 # number of ReadExReq miss cycles +system.cpu.l2cache.ReadExReq_miss_rate_0 1 # miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_misses 146 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_misses_0 146 # number of ReadExReq misses +system.cpu.l2cache.ReadExReq_mshr_miss_latency 422500 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_latency_0 422500 # number of ReadExReq MSHR miss cycles +system.cpu.l2cache.ReadExReq_mshr_miss_rate_0 1 # mshr miss rate for ReadExReq accesses +system.cpu.l2cache.ReadExReq_mshr_misses 146 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadExReq_mshr_misses_0 146 # number of ReadExReq MSHR misses +system.cpu.l2cache.ReadReq_accesses 820 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_accesses_0 820 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency_0 3880.368098 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency_0 2880.368098 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_hits 5 # number of ReadReq hits +system.cpu.l2cache.ReadReq_hits_0 5 # number of ReadReq hits +system.cpu.l2cache.ReadReq_miss_latency 3162500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_latency_0 3162500 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate_0 0.993902 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 815 # number of ReadReq misses +system.cpu.l2cache.ReadReq_misses_0 815 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 2347500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_latency_0 2347500 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate_0 0.993902 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 815 # number of ReadReq MSHR misses +system.cpu.l2cache.ReadReq_mshr_misses_0 815 # number of ReadReq MSHR misses +system.cpu.l2cache.UpgradeReq_accesses 28 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_accesses_0 28 # number of UpgradeReq accesses(hits+misses) +system.cpu.l2cache.UpgradeReq_avg_miss_latency_0 3392.857143 # average UpgradeReq miss latency +system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency_0 2392.857143 # average UpgradeReq mshr miss latency +system.cpu.l2cache.UpgradeReq_miss_latency 95000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_latency_0 95000 # number of UpgradeReq miss cycles +system.cpu.l2cache.UpgradeReq_miss_rate_0 1 # miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_misses 28 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_misses_0 28 # number of UpgradeReq misses +system.cpu.l2cache.UpgradeReq_mshr_miss_latency 67000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_latency_0 67000 # number of UpgradeReq MSHR miss cycles +system.cpu.l2cache.UpgradeReq_mshr_miss_rate_0 1 # mshr miss rate for UpgradeReq accesses +system.cpu.l2cache.UpgradeReq_mshr_misses 28 # number of UpgradeReq MSHR misses +system.cpu.l2cache.UpgradeReq_mshr_misses_0 28 # number of UpgradeReq MSHR misses system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.l2cache.avg_refs 0.001041 # Average number of references to valid blocks. +system.cpu.l2cache.avg_refs 0.006353 # Average number of references to valid blocks. system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.l2cache.cache_copies 0 # number of cache copies performed -system.cpu.l2cache.demand_accesses 962 # number of demand (read+write) accesses -system.cpu.l2cache.demand_accesses_0 962 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses 966 # number of demand (read+write) accesses +system.cpu.l2cache.demand_accesses_0 966 # number of demand (read+write) accesses system.cpu.l2cache.demand_accesses_1 0 # number of demand (read+write) accesses system.cpu.l2cache.demand_avg_miss_latency # average overall miss latency -system.cpu.l2cache.demand_avg_miss_latency_0 5208.636837 # average overall miss latency +system.cpu.l2cache.demand_avg_miss_latency_0 3882.414152 # average overall miss latency system.cpu.l2cache.demand_avg_miss_latency_1 # average overall miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.l2cache.demand_avg_mshr_miss_latency_0 2724.765869 # average overall mshr miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency_0 2882.414152 # average overall mshr miss latency system.cpu.l2cache.demand_avg_mshr_miss_latency_1 # average overall mshr miss latency -system.cpu.l2cache.demand_hits 1 # number of demand (read+write) hits -system.cpu.l2cache.demand_hits_0 1 # number of demand (read+write) hits +system.cpu.l2cache.demand_hits 5 # number of demand (read+write) hits +system.cpu.l2cache.demand_hits_0 5 # number of demand (read+write) hits system.cpu.l2cache.demand_hits_1 0 # number of demand (read+write) hits -system.cpu.l2cache.demand_miss_latency 5005500 # number of demand (read+write) miss cycles -system.cpu.l2cache.demand_miss_latency_0 5005500 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency 3731000 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_latency_0 3731000 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_latency_1 0 # number of demand (read+write) miss cycles system.cpu.l2cache.demand_miss_rate # miss rate for demand accesses -system.cpu.l2cache.demand_miss_rate_0 0.998960 # miss rate for demand accesses +system.cpu.l2cache.demand_miss_rate_0 0.994824 # miss rate for demand accesses system.cpu.l2cache.demand_miss_rate_1 # miss rate for demand accesses system.cpu.l2cache.demand_misses 961 # number of demand (read+write) misses system.cpu.l2cache.demand_misses_0 961 # number of demand (read+write) misses @@ -607,11 +635,11 @@ system.cpu.l2cache.demand_misses_1 0 # nu system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_0 0 # number of demand (read+write) MSHR hits system.cpu.l2cache.demand_mshr_hits_1 0 # number of demand (read+write) MSHR hits -system.cpu.l2cache.demand_mshr_miss_latency 2618500 # number of demand (read+write) MSHR miss cycles -system.cpu.l2cache.demand_mshr_miss_latency_0 2618500 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency 2770000 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_latency_0 2770000 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_latency_1 0 # number of demand (read+write) MSHR miss cycles system.cpu.l2cache.demand_mshr_miss_rate # mshr miss rate for demand accesses -system.cpu.l2cache.demand_mshr_miss_rate_0 0.998960 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_miss_rate_0 0.994824 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_miss_rate_1 # mshr miss rate for demand accesses system.cpu.l2cache.demand_mshr_misses 961 # number of demand (read+write) MSHR misses system.cpu.l2cache.demand_mshr_misses_0 961 # number of demand (read+write) MSHR misses @@ -621,26 +649,26 @@ system.cpu.l2cache.mshr_cap_events 0 # nu system.cpu.l2cache.mshr_cap_events_0 0 # number of times MSHR cap was activated system.cpu.l2cache.mshr_cap_events_1 0 # number of times MSHR cap was activated system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.l2cache.overall_accesses 962 # number of overall (read+write) accesses -system.cpu.l2cache.overall_accesses_0 962 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses 966 # number of overall (read+write) accesses +system.cpu.l2cache.overall_accesses_0 966 # number of overall (read+write) accesses system.cpu.l2cache.overall_accesses_1 0 # number of overall (read+write) accesses system.cpu.l2cache.overall_avg_miss_latency # average overall miss latency -system.cpu.l2cache.overall_avg_miss_latency_0 5208.636837 # average overall miss latency +system.cpu.l2cache.overall_avg_miss_latency_0 3882.414152 # average overall miss latency system.cpu.l2cache.overall_avg_miss_latency_1 # average overall miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.l2cache.overall_avg_mshr_miss_latency_0 2724.765869 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency_0 2882.414152 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_miss_latency_1 # average overall mshr miss latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_0 # average overall mshr uncacheable latency system.cpu.l2cache.overall_avg_mshr_uncacheable_latency_1 # average overall mshr uncacheable latency -system.cpu.l2cache.overall_hits 1 # number of overall hits -system.cpu.l2cache.overall_hits_0 1 # number of overall hits +system.cpu.l2cache.overall_hits 5 # number of overall hits +system.cpu.l2cache.overall_hits_0 5 # number of overall hits system.cpu.l2cache.overall_hits_1 0 # number of overall hits -system.cpu.l2cache.overall_miss_latency 5005500 # number of overall miss cycles -system.cpu.l2cache.overall_miss_latency_0 5005500 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency 3731000 # number of overall miss cycles +system.cpu.l2cache.overall_miss_latency_0 3731000 # number of overall miss cycles system.cpu.l2cache.overall_miss_latency_1 0 # number of overall miss cycles system.cpu.l2cache.overall_miss_rate # miss rate for overall accesses -system.cpu.l2cache.overall_miss_rate_0 0.998960 # miss rate for overall accesses +system.cpu.l2cache.overall_miss_rate_0 0.994824 # miss rate for overall accesses system.cpu.l2cache.overall_miss_rate_1 # miss rate for overall accesses system.cpu.l2cache.overall_misses 961 # number of overall misses system.cpu.l2cache.overall_misses_0 961 # number of overall misses @@ -648,11 +676,11 @@ system.cpu.l2cache.overall_misses_1 0 # nu system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_0 0 # number of overall MSHR hits system.cpu.l2cache.overall_mshr_hits_1 0 # number of overall MSHR hits -system.cpu.l2cache.overall_mshr_miss_latency 2618500 # number of overall MSHR miss cycles -system.cpu.l2cache.overall_mshr_miss_latency_0 2618500 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency 2770000 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_latency_0 2770000 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_latency_1 0 # number of overall MSHR miss cycles system.cpu.l2cache.overall_mshr_miss_rate # mshr miss rate for overall accesses -system.cpu.l2cache.overall_mshr_miss_rate_0 0.998960 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_miss_rate_0 0.994824 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_miss_rate_1 # mshr miss rate for overall accesses system.cpu.l2cache.overall_mshr_misses 961 # number of overall MSHR misses system.cpu.l2cache.overall_mshr_misses_0 961 # number of overall MSHR misses @@ -675,31 +703,31 @@ system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 system.cpu.l2cache.replacements 0 # number of replacements system.cpu.l2cache.replacements_0 0 # number of replacements system.cpu.l2cache.replacements_1 0 # number of replacements -system.cpu.l2cache.sampled_refs 961 # Sample count of references to valid blocks. +system.cpu.l2cache.sampled_refs 787 # Sample count of references to valid blocks. system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_0 0 # number of mshr full events for SW prefetching instrutions system.cpu.l2cache.soft_prefetch_mshr_full_1 0 # number of mshr full events for SW prefetching instrutions -system.cpu.l2cache.tagsinuse 545.318204 # Cycle average of tags in use -system.cpu.l2cache.total_refs 1 # Total number of references to valid blocks. +system.cpu.l2cache.tagsinuse 430.884580 # Cycle average of tags in use +system.cpu.l2cache.total_refs 5 # Total number of references to valid blocks. system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. system.cpu.l2cache.writebacks 0 # number of writebacks system.cpu.l2cache.writebacks_0 0 # number of writebacks system.cpu.l2cache.writebacks_1 0 # number of writebacks -system.cpu.numCycles 10981 # number of cpu cycles simulated -system.cpu.rename.RENAME:BlockCycles 612 # Number of cycles rename is blocking +system.cpu.numCycles 10250 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 565 # Number of cycles rename is blocking system.cpu.rename.RENAME:CommittedMaps 8102 # Number of HB maps that are committed -system.cpu.rename.RENAME:IdleCycles 14828 # Number of cycles rename is idle -system.cpu.rename.RENAME:LSQFullEvents 692 # Number of times rename has blocked due to LSQ full -system.cpu.rename.RENAME:RenameLookups 26356 # Number of register rename lookups that rename has made -system.cpu.rename.RENAME:RenamedInsts 20731 # Number of instructions processed by rename -system.cpu.rename.RENAME:RenamedOperands 15606 # Number of destination operands rename has renamed -system.cpu.rename.RENAME:RunCycles 3494 # Number of cycles rename is running -system.cpu.rename.RENAME:SquashCycles 1511 # Number of cycles rename is squashing -system.cpu.rename.RENAME:UnblockCycles 761 # Number of cycles rename is unblocking -system.cpu.rename.RENAME:UndoneMaps 7504 # Number of HB maps that are undone due to squashing -system.cpu.rename.RENAME:serializeStallCycles 521 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:IdleCycles 13468 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 717 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:RenameLookups 26379 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 20752 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 15596 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 3515 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 1444 # Number of cycles rename is squashing +system.cpu.rename.RENAME:UnblockCycles 772 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 7494 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 497 # count of cycles rename stalled for serializing inst system.cpu.rename.RENAME:serializingInsts 48 # count of serializing insts renamed -system.cpu.rename.RENAME:skidInsts 2159 # count of insts added to the skid buffer +system.cpu.rename.RENAME:skidInsts 2091 # count of insts added to the skid buffer system.cpu.rename.RENAME:tempSerializingInsts 37 # count of temporary serializing insts renamed system.cpu.timesIdled 3 # Number of times that the entire CPU went into an idle state and unscheduled itself system.cpu.workload0.PROG:num_syscalls 17 # Number of system calls diff --git a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout index 76288ac1d..2e4042a43 100644 --- a/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout +++ b/tests/quick/01.hello-2T-smt/ref/alpha/linux/o3-timing/stdout @@ -7,9 +7,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 21 2007 21:25:27 -M5 started Fri Jun 22 00:04:51 2007 +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:15 2007 M5 executing on zizzer.eecs.umich.edu command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/01.hello-2T-smt/alpha/linux/o3-timing tests/run.py quick/01.hello-2T-smt/alpha/linux/o3-timing Global frequency set at 1000000000000 ticks per second -Exiting @ tick 5491500 because target called exit() +Exiting @ tick 5126000 because target called exit() diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini index 2d3b1a754..d3a9862e8 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/config.ini @@ -35,7 +35,7 @@ side_b=system.membus.port[0] [system.cpu0] type=AtomicSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=0 defer_registration=false @@ -55,18 +55,16 @@ profile=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu0.tracer width=1 dcache_port=system.cpu0.dcache.cpu_side icache_port=system.cpu0.icache.cpu_side [system.cpu0.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -84,12 +82,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu0.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -98,23 +94,15 @@ write_buffers=8 cpu_side=system.cpu0.dcache_port mem_side=system.toL2Bus.port[2] -[system.cpu0.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu0.dtb] type=AlphaDTB size=64 [system.cpu0.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -132,12 +120,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu0.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -146,18 +132,16 @@ write_buffers=8 cpu_side=system.cpu0.icache_port mem_side=system.toL2Bus.port[1] -[system.cpu0.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu0.itb] type=AlphaITB size=48 +[system.cpu0.tracer] +type=ExeTracer + [system.cpu1] type=AtomicSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=1 defer_registration=false @@ -177,18 +161,16 @@ profile=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu1.tracer width=1 dcache_port=system.cpu1.dcache.cpu_side icache_port=system.cpu1.icache.cpu_side [system.cpu1.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -206,12 +188,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu1.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -220,23 +200,15 @@ write_buffers=8 cpu_side=system.cpu1.dcache_port mem_side=system.toL2Bus.port[4] -[system.cpu1.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu1.dtb] type=AlphaDTB size=64 [system.cpu1.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -254,12 +226,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu1.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -268,15 +238,13 @@ write_buffers=8 cpu_side=system.cpu1.icache_port mem_side=system.toL2Bus.port[3] -[system.cpu1.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu1.itb] type=AlphaITB size=48 +[system.cpu1.tracer] +type=ExeTracer + [system.disk0] type=IdeDisk children=image @@ -331,11 +299,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst [system.l2c] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=8 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -353,12 +319,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=4194304 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=16 trace_addr=0 @@ -922,7 +886,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=system.tsunami system=system -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=system.tsunami year_is_bcd=false pio=system.iobus.port[23] diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt index 3458060ce..df780ee45 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/m5stats.txt @@ -1,20 +1,29 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1418499 # Simulator instruction rate (inst/s) -host_seconds 44.50 # Real time elapsed on the host -host_tick_rate 42028043491 # Simulator tick rate (ticks/s) +host_inst_rate 1258571 # Simulator instruction rate (inst/s) +host_mem_usage 256444 # Number of bytes of host memory used +host_seconds 50.16 # Real time elapsed on the host +host_tick_rate 37289409683 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 63125943 # Number of instructions simulated sim_seconds 1.870335 # Number of seconds simulated sim_ticks 1870335101500 # Number of ticks simulated -system.cpu0.dcache.ReadReq_accesses 9163941 # number of ReadReq accesses(hits+misses) -system.cpu0.dcache.ReadReq_hits 7464198 # number of ReadReq hits -system.cpu0.dcache.ReadReq_miss_rate 0.185482 # miss rate for ReadReq accesses -system.cpu0.dcache.ReadReq_misses 1699743 # number of ReadReq misses -system.cpu0.dcache.WriteReq_accesses 5933396 # number of WriteReq accesses(hits+misses) -system.cpu0.dcache.WriteReq_hits 5646722 # number of WriteReq hits -system.cpu0.dcache.WriteReq_miss_rate 0.048315 # miss rate for WriteReq accesses -system.cpu0.dcache.WriteReq_misses 286674 # number of WriteReq misses +system.cpu0.dcache.LoadLockedReq_accesses 188283 # number of LoadLockedReq accesses(hits+misses) +system.cpu0.dcache.LoadLockedReq_hits 172122 # number of LoadLockedReq hits +system.cpu0.dcache.LoadLockedReq_miss_rate 0.085834 # miss rate for LoadLockedReq accesses +system.cpu0.dcache.LoadLockedReq_misses 16161 # number of LoadLockedReq misses +system.cpu0.dcache.ReadReq_accesses 8975658 # number of ReadReq accesses(hits+misses) +system.cpu0.dcache.ReadReq_hits 7292076 # number of ReadReq hits +system.cpu0.dcache.ReadReq_miss_rate 0.187572 # miss rate for ReadReq accesses +system.cpu0.dcache.ReadReq_misses 1683582 # number of ReadReq misses +system.cpu0.dcache.StoreCondReq_accesses 187323 # number of StoreCondReq accesses(hits+misses) +system.cpu0.dcache.StoreCondReq_hits 159819 # number of StoreCondReq hits +system.cpu0.dcache.StoreCondReq_miss_rate 0.146827 # miss rate for StoreCondReq accesses +system.cpu0.dcache.StoreCondReq_misses 27504 # number of StoreCondReq misses +system.cpu0.dcache.WriteReq_accesses 5746073 # number of WriteReq accesses(hits+misses) +system.cpu0.dcache.WriteReq_hits 5372266 # number of WriteReq hits +system.cpu0.dcache.WriteReq_miss_rate 0.065054 # miss rate for WriteReq accesses +system.cpu0.dcache.WriteReq_misses 373807 # number of WriteReq misses system.cpu0.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu0.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu0.dcache.avg_refs 6.625567 # Average number of references to valid blocks. @@ -23,13 +32,13 @@ system.cpu0.dcache.blocked_no_targets 0 # nu system.cpu0.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu0.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu0.dcache.cache_copies 0 # number of cache copies performed -system.cpu0.dcache.demand_accesses 15097337 # number of demand (read+write) accesses +system.cpu0.dcache.demand_accesses 14721731 # number of demand (read+write) accesses system.cpu0.dcache.demand_avg_miss_latency 0 # average overall miss latency system.cpu0.dcache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu0.dcache.demand_hits 13110920 # number of demand (read+write) hits +system.cpu0.dcache.demand_hits 12664342 # number of demand (read+write) hits system.cpu0.dcache.demand_miss_latency 0 # number of demand (read+write) miss cycles -system.cpu0.dcache.demand_miss_rate 0.131574 # miss rate for demand accesses -system.cpu0.dcache.demand_misses 1986417 # number of demand (read+write) misses +system.cpu0.dcache.demand_miss_rate 0.139752 # miss rate for demand accesses +system.cpu0.dcache.demand_misses 2057389 # number of demand (read+write) misses system.cpu0.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu0.dcache.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.cpu0.dcache.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -37,14 +46,14 @@ system.cpu0.dcache.demand_mshr_misses 0 # nu system.cpu0.dcache.fast_writes 0 # number of fast writes performed system.cpu0.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu0.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu0.dcache.overall_accesses 15097337 # number of overall (read+write) accesses +system.cpu0.dcache.overall_accesses 14721731 # number of overall (read+write) accesses system.cpu0.dcache.overall_avg_miss_latency 0 # average overall miss latency system.cpu0.dcache.overall_avg_mshr_miss_latency # average overall mshr miss latency system.cpu0.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu0.dcache.overall_hits 13110920 # number of overall hits +system.cpu0.dcache.overall_hits 12664342 # number of overall hits system.cpu0.dcache.overall_miss_latency 0 # number of overall miss cycles -system.cpu0.dcache.overall_miss_rate 0.131574 # miss rate for overall accesses -system.cpu0.dcache.overall_misses 1986417 # number of overall misses +system.cpu0.dcache.overall_miss_rate 0.139752 # miss rate for overall accesses +system.cpu0.dcache.overall_misses 2057389 # number of overall misses system.cpu0.dcache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu0.dcache.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.cpu0.dcache.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -60,39 +69,13 @@ system.cpu0.dcache.prefetcher.num_hwpf_issued 0 system.cpu0.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu0.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu0.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu0.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu0.dcache.protocol.read_invalid 1699743 # read misses to invalid blocks -system.cpu0.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu0.dcache.protocol.snoop_inv_modified 2 # Invalidate snoops on modified blocks -system.cpu0.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu0.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu0.dcache.protocol.snoop_read_exclusive 689 # read snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_read_modified 4128 # read snoops on modified blocks -system.cpu0.dcache.protocol.snoop_read_owned 121 # read snoops on owned blocks -system.cpu0.dcache.protocol.snoop_read_shared 2691 # read snoops on shared blocks -system.cpu0.dcache.protocol.snoop_readex_exclusive 241 # readEx snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_readex_modified 227 # readEx snoops on modified blocks -system.cpu0.dcache.protocol.snoop_readex_owned 21 # readEx snoops on owned blocks -system.cpu0.dcache.protocol.snoop_readex_shared 14 # readEx snoops on shared blocks -system.cpu0.dcache.protocol.snoop_upgrade_owned 1359 # upgrade snoops on owned blocks -system.cpu0.dcache.protocol.snoop_upgrade_shared 725 # upgradee snoops on shared blocks -system.cpu0.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu0.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu0.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu0.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu0.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu0.dcache.protocol.write_invalid 282338 # write misses to invalid blocks -system.cpu0.dcache.protocol.write_owned 2517 # write misses to owned blocks -system.cpu0.dcache.protocol.write_shared 1819 # write misses to shared blocks system.cpu0.dcache.replacements 1978980 # number of replacements system.cpu0.dcache.sampled_refs 1979492 # Sample count of references to valid blocks. system.cpu0.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu0.dcache.tagsinuse 504.827576 # Cycle average of tags in use system.cpu0.dcache.total_refs 13115256 # Total number of references to valid blocks. system.cpu0.dcache.warmup_cycle 10840000 # Cycle when the warmup percentage was hit. -system.cpu0.dcache.writebacks 0 # number of writebacks +system.cpu0.dcache.writebacks 396796 # number of writebacks system.cpu0.dtb.accesses 698037 # DTB accesses system.cpu0.dtb.acv 251 # DTB access violations system.cpu0.dtb.hits 15082969 # DTB hits @@ -154,32 +137,6 @@ system.cpu0.icache.prefetcher.num_hwpf_issued 0 system.cpu0.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu0.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu0.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu0.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu0.icache.protocol.read_invalid 884872 # read misses to invalid blocks -system.cpu0.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu0.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu0.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu0.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu0.icache.protocol.snoop_read_exclusive 25832 # read snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu0.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu0.icache.protocol.snoop_read_shared 13268 # read snoops on shared blocks -system.cpu0.icache.protocol.snoop_readex_exclusive 78 # readEx snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu0.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu0.icache.protocol.snoop_readex_shared 0 # readEx snoops on shared blocks -system.cpu0.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu0.icache.protocol.snoop_upgrade_shared 6 # upgradee snoops on shared blocks -system.cpu0.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu0.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu0.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu0.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu0.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu0.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu0.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu0.icache.protocol.write_shared 0 # write misses to shared blocks system.cpu0.icache.replacements 884276 # number of replacements system.cpu0.icache.sampled_refs 884788 # Sample count of references to valid blocks. system.cpu0.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions @@ -285,14 +242,22 @@ system.cpu0.not_idle_fraction 0.015290 # Pe system.cpu0.numCycles 57193784 # number of cpu cycles simulated system.cpu0.num_insts 57190172 # Number of instructions executed system.cpu0.num_refs 15322419 # Number of memory references -system.cpu1.dcache.ReadReq_accesses 1167383 # number of ReadReq accesses(hits+misses) -system.cpu1.dcache.ReadReq_hits 1124444 # number of ReadReq hits -system.cpu1.dcache.ReadReq_miss_rate 0.036782 # miss rate for ReadReq accesses -system.cpu1.dcache.ReadReq_misses 42939 # number of ReadReq misses -system.cpu1.dcache.WriteReq_accesses 749650 # number of WriteReq accesses(hits+misses) -system.cpu1.dcache.WriteReq_hits 723062 # number of WriteReq hits -system.cpu1.dcache.WriteReq_miss_rate 0.035467 # miss rate for WriteReq accesses -system.cpu1.dcache.WriteReq_misses 26588 # number of WriteReq misses +system.cpu1.dcache.LoadLockedReq_accesses 16418 # number of LoadLockedReq accesses(hits+misses) +system.cpu1.dcache.LoadLockedReq_hits 15129 # number of LoadLockedReq hits +system.cpu1.dcache.LoadLockedReq_miss_rate 0.078511 # miss rate for LoadLockedReq accesses +system.cpu1.dcache.LoadLockedReq_misses 1289 # number of LoadLockedReq misses +system.cpu1.dcache.ReadReq_accesses 1150965 # number of ReadReq accesses(hits+misses) +system.cpu1.dcache.ReadReq_hits 1109315 # number of ReadReq hits +system.cpu1.dcache.ReadReq_miss_rate 0.036187 # miss rate for ReadReq accesses +system.cpu1.dcache.ReadReq_misses 41650 # number of ReadReq misses +system.cpu1.dcache.StoreCondReq_accesses 16345 # number of StoreCondReq accesses(hits+misses) +system.cpu1.dcache.StoreCondReq_hits 13438 # number of StoreCondReq hits +system.cpu1.dcache.StoreCondReq_miss_rate 0.177853 # miss rate for StoreCondReq accesses +system.cpu1.dcache.StoreCondReq_misses 2907 # number of StoreCondReq misses +system.cpu1.dcache.WriteReq_accesses 733305 # number of WriteReq accesses(hits+misses) +system.cpu1.dcache.WriteReq_hits 702800 # number of WriteReq hits +system.cpu1.dcache.WriteReq_miss_rate 0.041599 # miss rate for WriteReq accesses +system.cpu1.dcache.WriteReq_misses 30505 # number of WriteReq misses system.cpu1.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu1.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked system.cpu1.dcache.avg_refs 29.277705 # Average number of references to valid blocks. @@ -301,13 +266,13 @@ system.cpu1.dcache.blocked_no_targets 0 # nu system.cpu1.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu1.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu1.dcache.cache_copies 0 # number of cache copies performed -system.cpu1.dcache.demand_accesses 1917033 # number of demand (read+write) accesses +system.cpu1.dcache.demand_accesses 1884270 # number of demand (read+write) accesses system.cpu1.dcache.demand_avg_miss_latency 0 # average overall miss latency system.cpu1.dcache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu1.dcache.demand_hits 1847506 # number of demand (read+write) hits +system.cpu1.dcache.demand_hits 1812115 # number of demand (read+write) hits system.cpu1.dcache.demand_miss_latency 0 # number of demand (read+write) miss cycles -system.cpu1.dcache.demand_miss_rate 0.036268 # miss rate for demand accesses -system.cpu1.dcache.demand_misses 69527 # number of demand (read+write) misses +system.cpu1.dcache.demand_miss_rate 0.038293 # miss rate for demand accesses +system.cpu1.dcache.demand_misses 72155 # number of demand (read+write) misses system.cpu1.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu1.dcache.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.cpu1.dcache.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -315,14 +280,14 @@ system.cpu1.dcache.demand_mshr_misses 0 # nu system.cpu1.dcache.fast_writes 0 # number of fast writes performed system.cpu1.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu1.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu1.dcache.overall_accesses 1917033 # number of overall (read+write) accesses +system.cpu1.dcache.overall_accesses 1884270 # number of overall (read+write) accesses system.cpu1.dcache.overall_avg_miss_latency 0 # average overall miss latency system.cpu1.dcache.overall_avg_mshr_miss_latency # average overall mshr miss latency system.cpu1.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu1.dcache.overall_hits 1847506 # number of overall hits +system.cpu1.dcache.overall_hits 1812115 # number of overall hits system.cpu1.dcache.overall_miss_latency 0 # number of overall miss cycles -system.cpu1.dcache.overall_miss_rate 0.036268 # miss rate for overall accesses -system.cpu1.dcache.overall_misses 69527 # number of overall misses +system.cpu1.dcache.overall_miss_rate 0.038293 # miss rate for overall accesses +system.cpu1.dcache.overall_misses 72155 # number of overall misses system.cpu1.dcache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu1.dcache.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.cpu1.dcache.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -338,39 +303,13 @@ system.cpu1.dcache.prefetcher.num_hwpf_issued 0 system.cpu1.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu1.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu1.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu1.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu1.dcache.protocol.read_invalid 42939 # read misses to invalid blocks -system.cpu1.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu1.dcache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu1.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu1.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu1.dcache.protocol.snoop_read_exclusive 939 # read snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_read_modified 2438 # read snoops on modified blocks -system.cpu1.dcache.protocol.snoop_read_owned 337 # read snoops on owned blocks -system.cpu1.dcache.protocol.snoop_read_shared 61772 # read snoops on shared blocks -system.cpu1.dcache.protocol.snoop_readex_exclusive 103 # readEx snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_readex_modified 275 # readEx snoops on modified blocks -system.cpu1.dcache.protocol.snoop_readex_owned 44 # readEx snoops on owned blocks -system.cpu1.dcache.protocol.snoop_readex_shared 39 # readEx snoops on shared blocks -system.cpu1.dcache.protocol.snoop_upgrade_owned 1538 # upgrade snoops on owned blocks -system.cpu1.dcache.protocol.snoop_upgrade_shared 2755 # upgradee snoops on shared blocks -system.cpu1.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu1.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu1.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu1.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu1.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu1.dcache.protocol.write_invalid 24475 # write misses to invalid blocks -system.cpu1.dcache.protocol.write_owned 641 # write misses to owned blocks -system.cpu1.dcache.protocol.write_shared 1472 # write misses to shared blocks system.cpu1.dcache.replacements 62341 # number of replacements system.cpu1.dcache.sampled_refs 62660 # Sample count of references to valid blocks. system.cpu1.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu1.dcache.tagsinuse 391.945837 # Cycle average of tags in use system.cpu1.dcache.total_refs 1834541 # Total number of references to valid blocks. system.cpu1.dcache.warmup_cycle 1851266680500 # Cycle when the warmup percentage was hit. -system.cpu1.dcache.writebacks 0 # number of writebacks +system.cpu1.dcache.writebacks 30850 # number of writebacks system.cpu1.dtb.accesses 323622 # DTB accesses system.cpu1.dtb.acv 116 # DTB access violations system.cpu1.dtb.hits 1914885 # DTB hits @@ -432,32 +371,6 @@ system.cpu1.icache.prefetcher.num_hwpf_issued 0 system.cpu1.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu1.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu1.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu1.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu1.icache.protocol.read_invalid 103636 # read misses to invalid blocks -system.cpu1.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu1.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu1.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu1.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu1.icache.protocol.snoop_read_exclusive 17317 # read snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu1.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu1.icache.protocol.snoop_read_shared 199395 # read snoops on shared blocks -system.cpu1.icache.protocol.snoop_readex_exclusive 25 # readEx snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu1.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu1.icache.protocol.snoop_readex_shared 0 # readEx snoops on shared blocks -system.cpu1.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu1.icache.protocol.snoop_upgrade_shared 2 # upgradee snoops on shared blocks -system.cpu1.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu1.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu1.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu1.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu1.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu1.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu1.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu1.icache.protocol.write_shared 0 # write misses to shared blocks system.cpu1.icache.replacements 103097 # number of replacements system.cpu1.icache.sampled_refs 103609 # Sample count of references to valid blocks. system.cpu1.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions @@ -559,30 +472,33 @@ system.disk2.dma_write_bytes 8192 # Nu system.disk2.dma_write_full_pages 1 # Number of full page size DMA writes. system.disk2.dma_write_txs 1 # Number of DMA write transactions. system.l2c.ReadExReq_accesses 306246 # number of ReadExReq accesses(hits+misses) -system.l2c.ReadExReq_hits 181108 # number of ReadExReq hits -system.l2c.ReadExReq_miss_rate 0.408619 # miss rate for ReadExReq accesses -system.l2c.ReadExReq_misses 125138 # number of ReadExReq misses +system.l2c.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.l2c.ReadExReq_misses 306246 # number of ReadExReq misses system.l2c.ReadReq_accesses 2724166 # number of ReadReq accesses(hits+misses) -system.l2c.ReadReq_hits 1782863 # number of ReadReq hits -system.l2c.ReadReq_miss_rate 0.345538 # miss rate for ReadReq accesses -system.l2c.ReadReq_misses 941303 # number of ReadReq misses -system.l2c.Writeback_accesses 427634 # number of Writeback accesses(hits+misses) -system.l2c.Writeback_hits 427634 # number of Writeback hits +system.l2c.ReadReq_hits 1625506 # number of ReadReq hits +system.l2c.ReadReq_miss_rate 0.403301 # miss rate for ReadReq accesses +system.l2c.ReadReq_misses 1098660 # number of ReadReq misses +system.l2c.UpgradeReq_accesses 125013 # number of UpgradeReq accesses(hits+misses) +system.l2c.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_misses 125013 # number of UpgradeReq misses +system.l2c.Writeback_accesses 427646 # number of Writeback accesses(hits+misses) +system.l2c.Writeback_miss_rate 1 # miss rate for Writeback accesses +system.l2c.Writeback_misses 427646 # number of Writeback misses system.l2c.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.l2c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.l2c.avg_refs 2.242879 # Average number of references to valid blocks. +system.l2c.avg_refs 1.720013 # Average number of references to valid blocks. system.l2c.blocked_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_no_targets 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.l2c.cache_copies 0 # number of cache copies performed -system.l2c.demand_accesses 2724166 # number of demand (read+write) accesses +system.l2c.demand_accesses 3030412 # number of demand (read+write) accesses system.l2c.demand_avg_miss_latency 0 # average overall miss latency system.l2c.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.l2c.demand_hits 1782863 # number of demand (read+write) hits +system.l2c.demand_hits 1625506 # number of demand (read+write) hits system.l2c.demand_miss_latency 0 # number of demand (read+write) miss cycles -system.l2c.demand_miss_rate 0.345538 # miss rate for demand accesses -system.l2c.demand_misses 941303 # number of demand (read+write) misses +system.l2c.demand_miss_rate 0.463602 # miss rate for demand accesses +system.l2c.demand_misses 1404906 # number of demand (read+write) misses system.l2c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.l2c.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.l2c.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -590,14 +506,14 @@ system.l2c.demand_mshr_misses 0 # nu system.l2c.fast_writes 0 # number of fast writes performed system.l2c.mshr_cap_events 0 # number of times MSHR cap was activated system.l2c.no_allocate_misses 0 # Number of misses that were no-allocate -system.l2c.overall_accesses 3151800 # number of overall (read+write) accesses +system.l2c.overall_accesses 3030412 # number of overall (read+write) accesses system.l2c.overall_avg_miss_latency 0 # average overall miss latency system.l2c.overall_avg_mshr_miss_latency # average overall mshr miss latency system.l2c.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.l2c.overall_hits 2210497 # number of overall hits +system.l2c.overall_hits 1625506 # number of overall hits system.l2c.overall_miss_latency 0 # number of overall miss cycles -system.l2c.overall_miss_rate 0.298656 # miss rate for overall accesses -system.l2c.overall_misses 941303 # number of overall misses +system.l2c.overall_miss_rate 0.463602 # miss rate for overall accesses +system.l2c.overall_misses 1404906 # number of overall misses system.l2c.overall_mshr_hits 0 # number of overall MSHR hits system.l2c.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.l2c.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -613,12 +529,12 @@ system.l2c.prefetcher.num_hwpf_issued 0 # nu system.l2c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.l2c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.l2c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.l2c.replacements 1000779 # number of replacements -system.l2c.sampled_refs 1066159 # Sample count of references to valid blocks. +system.l2c.replacements 947869 # number of replacements +system.l2c.sampled_refs 966791 # Sample count of references to valid blocks. system.l2c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.l2c.tagsinuse 65517.575356 # Cycle average of tags in use -system.l2c.total_refs 2391266 # Total number of references to valid blocks. -system.l2c.warmup_cycle 618103500 # Cycle when the warmup percentage was hit. +system.l2c.tagsinuse 15587.342424 # Cycle average of tags in use +system.l2c.total_refs 1662893 # Total number of references to valid blocks. +system.l2c.warmup_cycle 990121000 # Cycle when the warmup percentage was hit. system.l2c.writebacks 0 # number of writebacks system.tsunami.ethernet.coalescedRxDesc # average number of RxDesc's coalesced into each post system.tsunami.ethernet.coalescedRxIdle # average number of RxIdle's coalesced into each post diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr index 3e1cbc554..563ca3160 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stderr @@ -1,5 +1,5 @@ -Listening for system connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 -0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001 +Listening for system connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb on port 7001 +0: system.remote_gdb.listener: listening for remote gdb on port 7002 warn: Entering event queue @ 0. Starting simulation... warn: 97861500: Trying to launch CPU number 1! diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout index e4b69d1d0..1298154d9 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic-dual/stdout @@ -5,10 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:10:03 -M5 started Mon Jun 11 01:04:58 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual +M5 compiled Aug 3 2007 04:02:11 +M5 started Fri Aug 3 04:22:43 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic-dual Global frequency set at 1000000000000 ticks per second - 0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 -Exiting @ tick 1870335097000 because m5_exit instruction encountered +Exiting @ tick 1870335101500 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini index 0347fbde9..3457f5f8f 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/config.ini @@ -35,7 +35,7 @@ side_b=system.membus.port[0] [system.cpu] type=AtomicSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=0 defer_registration=false @@ -55,18 +55,16 @@ profile=0 progress_interval=0 simulate_stalls=false system=system +tracer=system.cpu.tracer width=1 dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -84,12 +82,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -98,23 +94,15 @@ write_buffers=8 cpu_side=system.cpu.dcache_port mem_side=system.toL2Bus.port[2] -[system.cpu.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu.dtb] type=AlphaDTB size=64 [system.cpu.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -132,12 +120,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -146,15 +132,13 @@ write_buffers=8 cpu_side=system.cpu.icache_port mem_side=system.toL2Bus.port[1] -[system.cpu.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu.itb] type=AlphaITB size=48 +[system.cpu.tracer] +type=ExeTracer + [system.disk0] type=IdeDisk children=image @@ -209,11 +193,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst [system.l2c] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=8 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -231,12 +213,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=4194304 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=16 trace_addr=0 @@ -800,7 +780,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=system.tsunami system=system -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=system.tsunami year_is_bcd=false pio=system.iobus.port[23] diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt index 722437701..cc91e4c90 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/m5stats.txt @@ -1,35 +1,44 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 1403977 # Simulator instruction rate (inst/s) -host_seconds 42.74 # Real time elapsed on the host -host_tick_rate 42777462102 # Simulator tick rate (ticks/s) +host_inst_rate 1294756 # Simulator instruction rate (inst/s) +host_mem_usage 255900 # Number of bytes of host memory used +host_seconds 46.35 # Real time elapsed on the host +host_tick_rate 39449403667 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 60007317 # Number of instructions simulated sim_seconds 1.828355 # Number of seconds simulated -sim_ticks 1828355486000 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 9723333 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_hits 7984498 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_rate 0.178831 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 1738835 # number of ReadReq misses -system.cpu.dcache.WriteReq_accesses 6349447 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_hits 6045093 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_rate 0.047934 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 304354 # number of WriteReq misses +sim_ticks 1828355476000 # Number of ticks simulated +system.cpu.dcache.LoadLockedReq_accesses 200279 # number of LoadLockedReq accesses(hits+misses) +system.cpu.dcache.LoadLockedReq_hits 183119 # number of LoadLockedReq hits +system.cpu.dcache.LoadLockedReq_miss_rate 0.085680 # miss rate for LoadLockedReq accesses +system.cpu.dcache.LoadLockedReq_misses 17160 # number of LoadLockedReq misses +system.cpu.dcache.ReadReq_accesses 9523054 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_hits 7801377 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_rate 0.180790 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 1721677 # number of ReadReq misses +system.cpu.dcache.StoreCondReq_accesses 199258 # number of StoreCondReq accesses(hits+misses) +system.cpu.dcache.StoreCondReq_hits 169392 # number of StoreCondReq hits +system.cpu.dcache.StoreCondReq_miss_rate 0.149886 # miss rate for StoreCondReq accesses +system.cpu.dcache.StoreCondReq_misses 29866 # number of StoreCondReq misses +system.cpu.dcache.WriteReq_accesses 6150189 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_hits 5750772 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_rate 0.064944 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 399417 # number of WriteReq misses system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 6.866566 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 6.866558 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 16072780 # number of demand (read+write) accesses +system.cpu.dcache.demand_accesses 15673243 # number of demand (read+write) accesses system.cpu.dcache.demand_avg_miss_latency 0 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.dcache.demand_hits 14029591 # number of demand (read+write) hits +system.cpu.dcache.demand_hits 13552149 # number of demand (read+write) hits system.cpu.dcache.demand_miss_latency 0 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.127121 # miss rate for demand accesses -system.cpu.dcache.demand_misses 2043189 # number of demand (read+write) misses +system.cpu.dcache.demand_miss_rate 0.135332 # miss rate for demand accesses +system.cpu.dcache.demand_misses 2121094 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.cpu.dcache.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -37,14 +46,14 @@ system.cpu.dcache.demand_mshr_misses 0 # nu system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 16072780 # number of overall (read+write) accesses +system.cpu.dcache.overall_accesses 15673243 # number of overall (read+write) accesses system.cpu.dcache.overall_avg_miss_latency 0 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 14029591 # number of overall hits +system.cpu.dcache.overall_hits 13552149 # number of overall hits system.cpu.dcache.overall_miss_latency 0 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.127121 # miss rate for overall accesses -system.cpu.dcache.overall_misses 2043189 # number of overall misses +system.cpu.dcache.overall_miss_rate 0.135332 # miss rate for overall accesses +system.cpu.dcache.overall_misses 2121094 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.dcache.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.cpu.dcache.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -60,39 +69,13 @@ system.cpu.dcache.prefetcher.num_hwpf_issued 0 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu.dcache.protocol.read_invalid 1738835 # read misses to invalid blocks -system.cpu.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu.dcache.protocol.snoop_inv_modified 1 # Invalidate snoops on modified blocks -system.cpu.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu.dcache.protocol.snoop_read_exclusive 10 # read snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_read_modified 15 # read snoops on modified blocks -system.cpu.dcache.protocol.snoop_read_owned 2 # read snoops on owned blocks -system.cpu.dcache.protocol.snoop_read_shared 124 # read snoops on shared blocks -system.cpu.dcache.protocol.snoop_readex_exclusive 0 # readEx snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu.dcache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu.dcache.protocol.snoop_readex_shared 0 # readEx snoops on shared blocks -system.cpu.dcache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu.dcache.protocol.snoop_upgrade_shared 0 # upgradee snoops on shared blocks -system.cpu.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu.dcache.protocol.write_invalid 304342 # write misses to invalid blocks -system.cpu.dcache.protocol.write_owned 8 # write misses to owned blocks -system.cpu.dcache.protocol.write_shared 4 # write misses to shared blocks -system.cpu.dcache.replacements 2042664 # number of replacements -system.cpu.dcache.sampled_refs 2043176 # Sample count of references to valid blocks. +system.cpu.dcache.replacements 2042666 # number of replacements +system.cpu.dcache.sampled_refs 2043178 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.tagsinuse 511.997801 # Cycle average of tags in use -system.cpu.dcache.total_refs 14029603 # Total number of references to valid blocks. +system.cpu.dcache.total_refs 14029601 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 10840000 # Cycle when the warmup percentage was hit. -system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.dcache.writebacks 428885 # number of writebacks system.cpu.dtb.accesses 1020787 # DTB accesses system.cpu.dtb.acv 367 # DTB access violations system.cpu.dtb.hits 16053818 # DTB hits @@ -106,12 +89,12 @@ system.cpu.dtb.write_acv 157 # DT system.cpu.dtb.write_hits 6349968 # DTB write hits system.cpu.dtb.write_misses 1142 # DTB write misses system.cpu.icache.ReadReq_accesses 60007317 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_hits 59087263 # number of ReadReq hits +system.cpu.icache.ReadReq_hits 59087260 # number of ReadReq hits system.cpu.icache.ReadReq_miss_rate 0.015332 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 920054 # number of ReadReq misses +system.cpu.icache.ReadReq_misses 920057 # number of ReadReq misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 64.229545 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 64.229332 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked @@ -120,10 +103,10 @@ system.cpu.icache.cache_copies 0 # nu system.cpu.icache.demand_accesses 60007317 # number of demand (read+write) accesses system.cpu.icache.demand_avg_miss_latency 0 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.cpu.icache.demand_hits 59087263 # number of demand (read+write) hits +system.cpu.icache.demand_hits 59087260 # number of demand (read+write) hits system.cpu.icache.demand_miss_latency 0 # number of demand (read+write) miss cycles system.cpu.icache.demand_miss_rate 0.015332 # miss rate for demand accesses -system.cpu.icache.demand_misses 920054 # number of demand (read+write) misses +system.cpu.icache.demand_misses 920057 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.cpu.icache.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.cpu.icache.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -135,10 +118,10 @@ system.cpu.icache.overall_accesses 60007317 # nu system.cpu.icache.overall_avg_miss_latency 0 # average overall miss latency system.cpu.icache.overall_avg_mshr_miss_latency # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 59087263 # number of overall hits +system.cpu.icache.overall_hits 59087260 # number of overall hits system.cpu.icache.overall_miss_latency 0 # number of overall miss cycles system.cpu.icache.overall_miss_rate 0.015332 # miss rate for overall accesses -system.cpu.icache.overall_misses 920054 # number of overall misses +system.cpu.icache.overall_misses 920057 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits system.cpu.icache.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.cpu.icache.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -154,37 +137,11 @@ system.cpu.icache.prefetcher.num_hwpf_issued 0 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu.icache.protocol.read_invalid 920054 # read misses to invalid blocks -system.cpu.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu.icache.protocol.snoop_read_exclusive 643 # read snoops on exclusive blocks -system.cpu.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu.icache.protocol.snoop_read_shared 1039 # read snoops on shared blocks -system.cpu.icache.protocol.snoop_readex_exclusive 105 # readEx snoops on exclusive blocks -system.cpu.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu.icache.protocol.snoop_readex_shared 1 # readEx snoops on shared blocks -system.cpu.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu.icache.protocol.snoop_upgrade_shared 9 # upgradee snoops on shared blocks -system.cpu.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu.icache.protocol.write_shared 0 # write misses to shared blocks -system.cpu.icache.replacements 919427 # number of replacements -system.cpu.icache.sampled_refs 919939 # Sample count of references to valid blocks. +system.cpu.icache.replacements 919430 # number of replacements +system.cpu.icache.sampled_refs 919942 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.icache.tagsinuse 511.214820 # Cycle average of tags in use -system.cpu.icache.total_refs 59087263 # Total number of references to valid blocks. +system.cpu.icache.total_refs 59087260 # Total number of references to valid blocks. system.cpu.icache.warmup_cycle 9686972500 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks system.cpu.idle_fraction 0.983588 # Percentage of idle cycles @@ -222,8 +179,8 @@ system.cpu.kern.ipl_good_0 73448 49.29% 49.29% # nu system.cpu.kern.ipl_good_21 243 0.16% 49.46% # number of times we switched to this ipl from a different ipl system.cpu.kern.ipl_good_22 1865 1.25% 50.71% # number of times we switched to this ipl from a different ipl system.cpu.kern.ipl_good_31 73448 49.29% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_ticks 1828355278500 # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_0 1811087547500 99.06% 99.06% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks 1828355268500 # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_0 1811087537500 99.06% 99.06% # number of cycles we spent at this ipl system.cpu.kern.ipl_ticks_21 20110000 0.00% 99.06% # number of cycles we spent at this ipl system.cpu.kern.ipl_ticks_22 80195000 0.00% 99.06% # number of cycles we spent at this ipl system.cpu.kern.ipl_ticks_31 17167426000 0.94% 100.00% # number of cycles we spent at this ipl @@ -243,7 +200,7 @@ system.cpu.kern.mode_switch_good_user 1 # fr system.cpu.kern.mode_switch_good_idle 0.081545 # fraction of useful protection mode switches system.cpu.kern.mode_ticks_kernel 26834026500 1.47% 1.47% # number of ticks spent at the given mode system.cpu.kern.mode_ticks_user 1465069000 0.08% 1.55% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_idle 1800056182000 98.45% 100.00% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_idle 1800056172000 98.45% 100.00% # number of ticks spent at the given mode system.cpu.kern.swap_context 4178 # number of times the context was actually changed system.cpu.kern.syscall 326 # number of syscalls executed system.cpu.kern.syscall_2 8 2.45% 2.45% # number of syscalls executed @@ -293,30 +250,33 @@ system.disk2.dma_write_bytes 8192 # Nu system.disk2.dma_write_full_pages 1 # Number of full page size DMA writes. system.disk2.dma_write_txs 1 # Number of DMA write transactions. system.l2c.ReadExReq_accesses 304342 # number of ReadExReq accesses(hits+misses) -system.l2c.ReadExReq_hits 187346 # number of ReadExReq hits -system.l2c.ReadExReq_miss_rate 0.384423 # miss rate for ReadExReq accesses -system.l2c.ReadExReq_misses 116996 # number of ReadExReq misses -system.l2c.ReadReq_accesses 2658872 # number of ReadReq accesses(hits+misses) -system.l2c.ReadReq_hits 1717828 # number of ReadReq hits -system.l2c.ReadReq_miss_rate 0.353926 # miss rate for ReadReq accesses -system.l2c.ReadReq_misses 941044 # number of ReadReq misses +system.l2c.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.l2c.ReadExReq_misses 304342 # number of ReadExReq misses +system.l2c.ReadReq_accesses 2658877 # number of ReadReq accesses(hits+misses) +system.l2c.ReadReq_hits 1558398 # number of ReadReq hits +system.l2c.ReadReq_miss_rate 0.413889 # miss rate for ReadReq accesses +system.l2c.ReadReq_misses 1100479 # number of ReadReq misses +system.l2c.UpgradeReq_accesses 124941 # number of UpgradeReq accesses(hits+misses) +system.l2c.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_misses 124941 # number of UpgradeReq misses system.l2c.Writeback_accesses 428885 # number of Writeback accesses(hits+misses) -system.l2c.Writeback_hits 428885 # number of Writeback hits +system.l2c.Writeback_miss_rate 1 # miss rate for Writeback accesses +system.l2c.Writeback_misses 428885 # number of Writeback misses system.l2c.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.l2c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.l2c.avg_refs 2.205901 # Average number of references to valid blocks. +system.l2c.avg_refs 1.644070 # Average number of references to valid blocks. system.l2c.blocked_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_no_targets 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.l2c.cache_copies 0 # number of cache copies performed -system.l2c.demand_accesses 2658872 # number of demand (read+write) accesses +system.l2c.demand_accesses 2963219 # number of demand (read+write) accesses system.l2c.demand_avg_miss_latency 0 # average overall miss latency system.l2c.demand_avg_mshr_miss_latency # average overall mshr miss latency -system.l2c.demand_hits 1717828 # number of demand (read+write) hits +system.l2c.demand_hits 1558398 # number of demand (read+write) hits system.l2c.demand_miss_latency 0 # number of demand (read+write) miss cycles -system.l2c.demand_miss_rate 0.353926 # miss rate for demand accesses -system.l2c.demand_misses 941044 # number of demand (read+write) misses +system.l2c.demand_miss_rate 0.474086 # miss rate for demand accesses +system.l2c.demand_misses 1404821 # number of demand (read+write) misses system.l2c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits system.l2c.demand_mshr_miss_latency 0 # number of demand (read+write) MSHR miss cycles system.l2c.demand_mshr_miss_rate 0 # mshr miss rate for demand accesses @@ -324,14 +284,14 @@ system.l2c.demand_mshr_misses 0 # nu system.l2c.fast_writes 0 # number of fast writes performed system.l2c.mshr_cap_events 0 # number of times MSHR cap was activated system.l2c.no_allocate_misses 0 # Number of misses that were no-allocate -system.l2c.overall_accesses 3087757 # number of overall (read+write) accesses +system.l2c.overall_accesses 2963219 # number of overall (read+write) accesses system.l2c.overall_avg_miss_latency 0 # average overall miss latency system.l2c.overall_avg_mshr_miss_latency # average overall mshr miss latency system.l2c.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.l2c.overall_hits 2146713 # number of overall hits +system.l2c.overall_hits 1558398 # number of overall hits system.l2c.overall_miss_latency 0 # number of overall miss cycles -system.l2c.overall_miss_rate 0.304766 # miss rate for overall accesses -system.l2c.overall_misses 941044 # number of overall misses +system.l2c.overall_miss_rate 0.474086 # miss rate for overall accesses +system.l2c.overall_misses 1404821 # number of overall misses system.l2c.overall_mshr_hits 0 # number of overall MSHR hits system.l2c.overall_mshr_miss_latency 0 # number of overall MSHR miss cycles system.l2c.overall_mshr_miss_rate 0 # mshr miss rate for overall accesses @@ -347,12 +307,12 @@ system.l2c.prefetcher.num_hwpf_issued 0 # nu system.l2c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.l2c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.l2c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.l2c.replacements 992432 # number of replacements -system.l2c.sampled_refs 1057820 # Sample count of references to valid blocks. +system.l2c.replacements 947436 # number of replacements +system.l2c.sampled_refs 965232 # Sample count of references to valid blocks. system.l2c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.l2c.tagsinuse 65517.661064 # Cycle average of tags in use -system.l2c.total_refs 2333446 # Total number of references to valid blocks. -system.l2c.warmup_cycle 614754000 # Cycle when the warmup percentage was hit. +system.l2c.tagsinuse 15309.548937 # Cycle average of tags in use +system.l2c.total_refs 1586909 # Total number of references to valid blocks. +system.l2c.warmup_cycle 789998500 # Cycle when the warmup percentage was hit. system.l2c.writebacks 0 # number of writebacks system.tsunami.ethernet.coalescedRxDesc # average number of RxDesc's coalesced into each post system.tsunami.ethernet.coalescedRxIdle # average number of RxIdle's coalesced into each post diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr index f34493a86..32120d9d6 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stderr @@ -1,3 +1,3 @@ -Listening for system connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 +Listening for system connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb on port 7001 warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout index 6a6b8d735..1f648aea1 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-atomic/stdout @@ -5,10 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:10:03 -M5 started Mon Jun 11 00:55:45 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic +M5 compiled Aug 3 2007 04:02:11 +M5 started Fri Aug 3 04:21:55 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-atomic tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-atomic Global frequency set at 1000000000000 ticks per second - 0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 -Exiting @ tick 1828355481500 because m5_exit instruction encountered +Exiting @ tick 1828355476000 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini index 552344dcb..bbfd059cd 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/config.ini @@ -35,7 +35,7 @@ side_b=system.membus.port[0] [system.cpu0] type=TimingSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=0 defer_registration=false @@ -54,17 +54,15 @@ phase=0 profile=0 progress_interval=0 system=system +tracer=system.cpu0.tracer dcache_port=system.cpu0.dcache.cpu_side icache_port=system.cpu0.icache.cpu_side [system.cpu0.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -82,12 +80,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu0.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -96,23 +92,15 @@ write_buffers=8 cpu_side=system.cpu0.dcache_port mem_side=system.toL2Bus.port[2] -[system.cpu0.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu0.dtb] type=AlphaDTB size=64 [system.cpu0.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -130,12 +118,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu0.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -144,18 +130,16 @@ write_buffers=8 cpu_side=system.cpu0.icache_port mem_side=system.toL2Bus.port[1] -[system.cpu0.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu0.itb] type=AlphaITB size=48 +[system.cpu0.tracer] +type=ExeTracer + [system.cpu1] type=TimingSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=1 defer_registration=false @@ -174,17 +158,15 @@ phase=0 profile=0 progress_interval=0 system=system +tracer=system.cpu1.tracer dcache_port=system.cpu1.dcache.cpu_side icache_port=system.cpu1.icache.cpu_side [system.cpu1.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -202,12 +184,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu1.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -216,23 +196,15 @@ write_buffers=8 cpu_side=system.cpu1.dcache_port mem_side=system.toL2Bus.port[4] -[system.cpu1.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu1.dtb] type=AlphaDTB size=64 [system.cpu1.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -250,12 +222,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu1.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -264,15 +234,13 @@ write_buffers=8 cpu_side=system.cpu1.icache_port mem_side=system.toL2Bus.port[3] -[system.cpu1.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu1.itb] type=AlphaITB size=48 +[system.cpu1.tracer] +type=ExeTracer + [system.disk0] type=IdeDisk children=image @@ -327,11 +295,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst [system.l2c] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=8 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -349,12 +315,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=4194304 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=16 trace_addr=0 @@ -918,7 +882,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=system.tsunami system=system -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=system.tsunami year_is_bcd=false pio=system.iobus.port[23] diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt index 0e86983a6..b7e78eb06 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/m5stats.txt @@ -1,74 +1,93 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 62524 # Simulator instruction rate (inst/s) -host_seconds 1011.60 # Real time elapsed on the host -host_tick_rate 1928760125 # Simulator tick rate (ticks/s) +host_inst_rate 608366 # Simulator instruction rate (inst/s) +host_mem_usage 227884 # Number of bytes of host memory used +host_seconds 106.58 # Real time elapsed on the host +host_tick_rate 18308931831 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 63248814 # Number of instructions simulated -sim_seconds 1.951129 # Number of seconds simulated -sim_ticks 1951129131000 # Number of ticks simulated -system.cpu0.dcache.ReadReq_accesses 9299202 # number of ReadReq accesses(hits+misses) -system.cpu0.dcache.ReadReq_avg_miss_latency 13073.177688 # average ReadReq miss latency -system.cpu0.dcache.ReadReq_avg_mshr_miss_latency 12073.152824 # average ReadReq mshr miss latency -system.cpu0.dcache.ReadReq_hits 7589849 # number of ReadReq hits -system.cpu0.dcache.ReadReq_miss_latency 22346675500 # number of ReadReq miss cycles -system.cpu0.dcache.ReadReq_miss_rate 0.183817 # miss rate for ReadReq accesses -system.cpu0.dcache.ReadReq_misses 1709353 # number of ReadReq misses -system.cpu0.dcache.ReadReq_mshr_miss_latency 20637280000 # number of ReadReq MSHR miss cycles -system.cpu0.dcache.ReadReq_mshr_miss_rate 0.183817 # mshr miss rate for ReadReq accesses -system.cpu0.dcache.ReadReq_mshr_misses 1709353 # number of ReadReq MSHR misses -system.cpu0.dcache.ReadReq_mshr_uncacheable 6873 # number of ReadReq MSHR uncacheable -system.cpu0.dcache.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu0.dcache.ReadResp_mshr_uncacheable_latency 841915000 # number of ReadResp MSHR uncacheable cycles -system.cpu0.dcache.WriteReq_accesses 6016348 # number of WriteReq accesses(hits+misses) -system.cpu0.dcache.WriteReq_avg_miss_latency 12644.438594 # average WriteReq miss latency -system.cpu0.dcache.WriteReq_avg_mshr_miss_latency 11630.972878 # average WriteReq mshr miss latency -system.cpu0.dcache.WriteReq_hits 5727689 # number of WriteReq hits -system.cpu0.dcache.WriteReq_miss_latency 3649931000 # number of WriteReq miss cycles -system.cpu0.dcache.WriteReq_miss_rate 0.047979 # miss rate for WriteReq accesses -system.cpu0.dcache.WriteReq_misses 288659 # number of WriteReq misses -system.cpu0.dcache.WriteReq_mshr_miss_latency 3357385000 # number of WriteReq MSHR miss cycles -system.cpu0.dcache.WriteReq_mshr_miss_rate 0.047979 # mshr miss rate for WriteReq accesses -system.cpu0.dcache.WriteReq_mshr_misses 288659 # number of WriteReq MSHR misses -system.cpu0.dcache.WriteReq_mshr_uncacheable 9698 # number of WriteReq MSHR uncacheable -system.cpu0.dcache.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu0.dcache.WriteResp_mshr_uncacheable_latency 1186164500 # number of WriteResp MSHR uncacheable cycles +sim_insts 64839479 # Number of instructions simulated +sim_seconds 1.951367 # Number of seconds simulated +sim_ticks 1951367346000 # Number of ticks simulated +system.cpu0.dcache.LoadLockedReq_accesses 150248 # number of LoadLockedReq accesses(hits+misses) +system.cpu0.dcache.LoadLockedReq_avg_miss_latency 10860.561606 # average LoadLockedReq miss latency +system.cpu0.dcache.LoadLockedReq_avg_mshr_miss_latency 9860.561606 # average LoadLockedReq mshr miss latency +system.cpu0.dcache.LoadLockedReq_hits 136751 # number of LoadLockedReq hits +system.cpu0.dcache.LoadLockedReq_miss_latency 146585000 # number of LoadLockedReq miss cycles +system.cpu0.dcache.LoadLockedReq_miss_rate 0.089831 # miss rate for LoadLockedReq accesses +system.cpu0.dcache.LoadLockedReq_misses 13497 # number of LoadLockedReq misses +system.cpu0.dcache.LoadLockedReq_mshr_miss_latency 133088000 # number of LoadLockedReq MSHR miss cycles +system.cpu0.dcache.LoadLockedReq_mshr_miss_rate 0.089831 # mshr miss rate for LoadLockedReq accesses +system.cpu0.dcache.LoadLockedReq_mshr_misses 13497 # number of LoadLockedReq MSHR misses +system.cpu0.dcache.ReadReq_accesses 7920707 # number of ReadReq accesses(hits+misses) +system.cpu0.dcache.ReadReq_avg_miss_latency 13239.029006 # average ReadReq miss latency +system.cpu0.dcache.ReadReq_avg_mshr_miss_latency 12239.003253 # average ReadReq mshr miss latency +system.cpu0.dcache.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu0.dcache.ReadReq_hits 6328668 # number of ReadReq hits +system.cpu0.dcache.ReadReq_miss_latency 21077050500 # number of ReadReq miss cycles +system.cpu0.dcache.ReadReq_miss_rate 0.200997 # miss rate for ReadReq accesses +system.cpu0.dcache.ReadReq_misses 1592039 # number of ReadReq misses +system.cpu0.dcache.ReadReq_mshr_miss_latency 19484970500 # number of ReadReq MSHR miss cycles +system.cpu0.dcache.ReadReq_mshr_miss_rate 0.200997 # mshr miss rate for ReadReq accesses +system.cpu0.dcache.ReadReq_mshr_misses 1592039 # number of ReadReq MSHR misses +system.cpu0.dcache.ReadReq_mshr_uncacheable_latency 846944000 # number of ReadReq MSHR uncacheable cycles +system.cpu0.dcache.StoreCondReq_accesses 149727 # number of StoreCondReq accesses(hits+misses) +system.cpu0.dcache.StoreCondReq_avg_miss_latency 12266.165876 # average StoreCondReq miss latency +system.cpu0.dcache.StoreCondReq_avg_mshr_miss_latency 11266.165876 # average StoreCondReq mshr miss latency +system.cpu0.dcache.StoreCondReq_hits 126963 # number of StoreCondReq hits +system.cpu0.dcache.StoreCondReq_miss_latency 279227000 # number of StoreCondReq miss cycles +system.cpu0.dcache.StoreCondReq_miss_rate 0.152037 # miss rate for StoreCondReq accesses +system.cpu0.dcache.StoreCondReq_misses 22764 # number of StoreCondReq misses +system.cpu0.dcache.StoreCondReq_mshr_miss_latency 256463000 # number of StoreCondReq MSHR miss cycles +system.cpu0.dcache.StoreCondReq_mshr_miss_rate 0.152037 # mshr miss rate for StoreCondReq accesses +system.cpu0.dcache.StoreCondReq_mshr_misses 22764 # number of StoreCondReq MSHR misses +system.cpu0.dcache.WriteReq_accesses 4824283 # number of WriteReq accesses(hits+misses) +system.cpu0.dcache.WriteReq_avg_miss_latency 13877.297001 # average WriteReq miss latency +system.cpu0.dcache.WriteReq_avg_mshr_miss_latency 12877.297001 # average WriteReq mshr miss latency +system.cpu0.dcache.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu0.dcache.WriteReq_hits 4508382 # number of WriteReq hits +system.cpu0.dcache.WriteReq_miss_latency 4383852000 # number of WriteReq miss cycles +system.cpu0.dcache.WriteReq_miss_rate 0.065481 # miss rate for WriteReq accesses +system.cpu0.dcache.WriteReq_misses 315901 # number of WriteReq misses +system.cpu0.dcache.WriteReq_mshr_miss_latency 4067951000 # number of WriteReq MSHR miss cycles +system.cpu0.dcache.WriteReq_mshr_miss_rate 0.065481 # mshr miss rate for WriteReq accesses +system.cpu0.dcache.WriteReq_mshr_misses 315901 # number of WriteReq MSHR misses +system.cpu0.dcache.WriteReq_mshr_uncacheable_latency 1297859000 # number of WriteReq MSHR uncacheable cycles system.cpu0.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu0.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu0.dcache.avg_refs 6.687909 # Average number of references to valid blocks. +system.cpu0.dcache.avg_refs 6.121232 # Average number of references to valid blocks. system.cpu0.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu0.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu0.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu0.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu0.dcache.cache_copies 0 # number of cache copies performed -system.cpu0.dcache.demand_accesses 15315550 # number of demand (read+write) accesses -system.cpu0.dcache.demand_avg_miss_latency 13011.236419 # average overall miss latency -system.cpu0.dcache.demand_avg_mshr_miss_latency 12009.269714 # average overall mshr miss latency -system.cpu0.dcache.demand_hits 13317538 # number of demand (read+write) hits -system.cpu0.dcache.demand_miss_latency 25996606500 # number of demand (read+write) miss cycles -system.cpu0.dcache.demand_miss_rate 0.130456 # miss rate for demand accesses -system.cpu0.dcache.demand_misses 1998012 # number of demand (read+write) misses +system.cpu0.dcache.demand_accesses 12744990 # number of demand (read+write) accesses +system.cpu0.dcache.demand_avg_miss_latency 13344.708167 # average overall miss latency +system.cpu0.dcache.demand_avg_mshr_miss_latency 12344.686678 # average overall mshr miss latency +system.cpu0.dcache.demand_hits 10837050 # number of demand (read+write) hits +system.cpu0.dcache.demand_miss_latency 25460902500 # number of demand (read+write) miss cycles +system.cpu0.dcache.demand_miss_rate 0.149701 # miss rate for demand accesses +system.cpu0.dcache.demand_misses 1907940 # number of demand (read+write) misses system.cpu0.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu0.dcache.demand_mshr_miss_latency 23994665000 # number of demand (read+write) MSHR miss cycles -system.cpu0.dcache.demand_mshr_miss_rate 0.130456 # mshr miss rate for demand accesses -system.cpu0.dcache.demand_mshr_misses 1998012 # number of demand (read+write) MSHR misses +system.cpu0.dcache.demand_mshr_miss_latency 23552921500 # number of demand (read+write) MSHR miss cycles +system.cpu0.dcache.demand_mshr_miss_rate 0.149701 # mshr miss rate for demand accesses +system.cpu0.dcache.demand_mshr_misses 1907940 # number of demand (read+write) MSHR misses system.cpu0.dcache.fast_writes 0 # number of fast writes performed system.cpu0.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu0.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu0.dcache.overall_accesses 15315550 # number of overall (read+write) accesses -system.cpu0.dcache.overall_avg_miss_latency 13011.236419 # average overall miss latency -system.cpu0.dcache.overall_avg_mshr_miss_latency 12009.269714 # average overall mshr miss latency -system.cpu0.dcache.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu0.dcache.overall_hits 13317538 # number of overall hits -system.cpu0.dcache.overall_miss_latency 25996606500 # number of overall miss cycles -system.cpu0.dcache.overall_miss_rate 0.130456 # miss rate for overall accesses -system.cpu0.dcache.overall_misses 1998012 # number of overall misses +system.cpu0.dcache.overall_accesses 12744990 # number of overall (read+write) accesses +system.cpu0.dcache.overall_avg_miss_latency 13344.708167 # average overall miss latency +system.cpu0.dcache.overall_avg_mshr_miss_latency 12344.686678 # average overall mshr miss latency +system.cpu0.dcache.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu0.dcache.overall_hits 10837050 # number of overall hits +system.cpu0.dcache.overall_miss_latency 25460902500 # number of overall miss cycles +system.cpu0.dcache.overall_miss_rate 0.149701 # miss rate for overall accesses +system.cpu0.dcache.overall_misses 1907940 # number of overall misses system.cpu0.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu0.dcache.overall_mshr_miss_latency 23994665000 # number of overall MSHR miss cycles -system.cpu0.dcache.overall_mshr_miss_rate 0.130456 # mshr miss rate for overall accesses -system.cpu0.dcache.overall_mshr_misses 1998012 # number of overall MSHR misses -system.cpu0.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu0.dcache.overall_mshr_uncacheable_misses 16571 # number of overall MSHR uncacheable misses +system.cpu0.dcache.overall_mshr_miss_latency 23552921500 # number of overall MSHR miss cycles +system.cpu0.dcache.overall_mshr_miss_rate 0.149701 # mshr miss rate for overall accesses +system.cpu0.dcache.overall_mshr_misses 1907940 # number of overall MSHR misses +system.cpu0.dcache.overall_mshr_uncacheable_latency 2144803000 # number of overall MSHR uncacheable cycles +system.cpu0.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu0.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu0.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu0.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -78,95 +97,69 @@ system.cpu0.dcache.prefetcher.num_hwpf_issued 0 system.cpu0.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu0.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu0.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu0.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu0.dcache.protocol.read_invalid 1709421 # read misses to invalid blocks -system.cpu0.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu0.dcache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu0.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu0.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu0.dcache.protocol.snoop_read_exclusive 908 # read snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_read_modified 3762 # read snoops on modified blocks -system.cpu0.dcache.protocol.snoop_read_owned 72 # read snoops on owned blocks -system.cpu0.dcache.protocol.snoop_read_shared 2297 # read snoops on shared blocks -system.cpu0.dcache.protocol.snoop_readex_exclusive 235 # readEx snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_readex_modified 207 # readEx snoops on modified blocks -system.cpu0.dcache.protocol.snoop_readex_owned 15 # readEx snoops on owned blocks -system.cpu0.dcache.protocol.snoop_readex_shared 7 # readEx snoops on shared blocks -system.cpu0.dcache.protocol.snoop_upgrade_owned 1074 # upgrade snoops on owned blocks -system.cpu0.dcache.protocol.snoop_upgrade_shared 726 # upgradee snoops on shared blocks -system.cpu0.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu0.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu0.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu0.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu0.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu0.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu0.dcache.protocol.write_invalid 284810 # write misses to invalid blocks -system.cpu0.dcache.protocol.write_owned 2533 # write misses to owned blocks -system.cpu0.dcache.protocol.write_shared 1354 # write misses to shared blocks -system.cpu0.dcache.replacements 1991354 # number of replacements -system.cpu0.dcache.sampled_refs 1991866 # Sample count of references to valid blocks. +system.cpu0.dcache.replacements 1829212 # number of replacements +system.cpu0.dcache.sampled_refs 1829724 # Sample count of references to valid blocks. system.cpu0.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu0.dcache.tagsinuse 503.775443 # Cycle average of tags in use -system.cpu0.dcache.total_refs 13321418 # Total number of references to valid blocks. -system.cpu0.dcache.warmup_cycle 57953000 # Cycle when the warmup percentage was hit. -system.cpu0.dcache.writebacks 401606 # number of writebacks -system.cpu0.dtb.accesses 719860 # DTB accesses -system.cpu0.dtb.acv 289 # DTB access violations -system.cpu0.dtb.hits 15299767 # DTB hits -system.cpu0.dtb.misses 8485 # DTB misses -system.cpu0.dtb.read_accesses 524201 # DTB read accesses -system.cpu0.dtb.read_acv 174 # DTB read access violations -system.cpu0.dtb.read_hits 9282693 # DTB read hits -system.cpu0.dtb.read_misses 7687 # DTB read misses -system.cpu0.dtb.write_accesses 195659 # DTB write accesses -system.cpu0.dtb.write_acv 115 # DTB write access violations -system.cpu0.dtb.write_hits 6017074 # DTB write hits -system.cpu0.dtb.write_misses 798 # DTB write misses -system.cpu0.icache.ReadReq_accesses 57872551 # number of ReadReq accesses(hits+misses) -system.cpu0.icache.ReadReq_avg_miss_latency 12029.752588 # average ReadReq miss latency -system.cpu0.icache.ReadReq_avg_mshr_miss_latency 11029.000057 # average ReadReq mshr miss latency -system.cpu0.icache.ReadReq_hits 56957639 # number of ReadReq hits -system.cpu0.icache.ReadReq_miss_latency 11006165000 # number of ReadReq miss cycles -system.cpu0.icache.ReadReq_miss_rate 0.015809 # miss rate for ReadReq accesses -system.cpu0.icache.ReadReq_misses 914912 # number of ReadReq misses -system.cpu0.icache.ReadReq_mshr_miss_latency 10090564500 # number of ReadReq MSHR miss cycles -system.cpu0.icache.ReadReq_mshr_miss_rate 0.015809 # mshr miss rate for ReadReq accesses -system.cpu0.icache.ReadReq_mshr_misses 914912 # number of ReadReq MSHR misses +system.cpu0.dcache.tagsinuse 497.900810 # Cycle average of tags in use +system.cpu0.dcache.total_refs 11200165 # Total number of references to valid blocks. +system.cpu0.dcache.warmup_cycle 58293000 # Cycle when the warmup percentage was hit. +system.cpu0.dcache.writebacks 322933 # number of writebacks +system.cpu0.dtb.accesses 725071 # DTB accesses +system.cpu0.dtb.acv 305 # DTB access violations +system.cpu0.dtb.hits 13035385 # DTB hits +system.cpu0.dtb.misses 8682 # DTB misses +system.cpu0.dtb.read_accesses 527638 # DTB read accesses +system.cpu0.dtb.read_acv 184 # DTB read access violations +system.cpu0.dtb.read_hits 8058540 # DTB read hits +system.cpu0.dtb.read_misses 7858 # DTB read misses +system.cpu0.dtb.write_accesses 197433 # DTB write accesses +system.cpu0.dtb.write_acv 121 # DTB write access violations +system.cpu0.dtb.write_hits 4976845 # DTB write hits +system.cpu0.dtb.write_misses 824 # DTB write misses +system.cpu0.icache.ReadReq_accesses 51081135 # number of ReadReq accesses(hits+misses) +system.cpu0.icache.ReadReq_avg_miss_latency 12048.344860 # average ReadReq miss latency +system.cpu0.icache.ReadReq_avg_mshr_miss_latency 11047.036239 # average ReadReq mshr miss latency +system.cpu0.icache.ReadReq_hits 50399501 # number of ReadReq hits +system.cpu0.icache.ReadReq_miss_latency 8212561500 # number of ReadReq miss cycles +system.cpu0.icache.ReadReq_miss_rate 0.013344 # miss rate for ReadReq accesses +system.cpu0.icache.ReadReq_misses 681634 # number of ReadReq misses +system.cpu0.icache.ReadReq_mshr_miss_latency 7530035500 # number of ReadReq MSHR miss cycles +system.cpu0.icache.ReadReq_mshr_miss_rate 0.013344 # mshr miss rate for ReadReq accesses +system.cpu0.icache.ReadReq_mshr_misses 681634 # number of ReadReq MSHR misses system.cpu0.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu0.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu0.icache.avg_refs 62.632934 # Average number of references to valid blocks. +system.cpu0.icache.avg_refs 73.953888 # Average number of references to valid blocks. system.cpu0.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu0.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu0.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu0.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu0.icache.cache_copies 0 # number of cache copies performed -system.cpu0.icache.demand_accesses 57872551 # number of demand (read+write) accesses -system.cpu0.icache.demand_avg_miss_latency 12029.752588 # average overall miss latency -system.cpu0.icache.demand_avg_mshr_miss_latency 11029.000057 # average overall mshr miss latency -system.cpu0.icache.demand_hits 56957639 # number of demand (read+write) hits -system.cpu0.icache.demand_miss_latency 11006165000 # number of demand (read+write) miss cycles -system.cpu0.icache.demand_miss_rate 0.015809 # miss rate for demand accesses -system.cpu0.icache.demand_misses 914912 # number of demand (read+write) misses +system.cpu0.icache.demand_accesses 51081135 # number of demand (read+write) accesses +system.cpu0.icache.demand_avg_miss_latency 12048.344860 # average overall miss latency +system.cpu0.icache.demand_avg_mshr_miss_latency 11047.036239 # average overall mshr miss latency +system.cpu0.icache.demand_hits 50399501 # number of demand (read+write) hits +system.cpu0.icache.demand_miss_latency 8212561500 # number of demand (read+write) miss cycles +system.cpu0.icache.demand_miss_rate 0.013344 # miss rate for demand accesses +system.cpu0.icache.demand_misses 681634 # number of demand (read+write) misses system.cpu0.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu0.icache.demand_mshr_miss_latency 10090564500 # number of demand (read+write) MSHR miss cycles -system.cpu0.icache.demand_mshr_miss_rate 0.015809 # mshr miss rate for demand accesses -system.cpu0.icache.demand_mshr_misses 914912 # number of demand (read+write) MSHR misses +system.cpu0.icache.demand_mshr_miss_latency 7530035500 # number of demand (read+write) MSHR miss cycles +system.cpu0.icache.demand_mshr_miss_rate 0.013344 # mshr miss rate for demand accesses +system.cpu0.icache.demand_mshr_misses 681634 # number of demand (read+write) MSHR misses system.cpu0.icache.fast_writes 0 # number of fast writes performed system.cpu0.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu0.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu0.icache.overall_accesses 57872551 # number of overall (read+write) accesses -system.cpu0.icache.overall_avg_miss_latency 12029.752588 # average overall miss latency -system.cpu0.icache.overall_avg_mshr_miss_latency 11029.000057 # average overall mshr miss latency +system.cpu0.icache.overall_accesses 51081135 # number of overall (read+write) accesses +system.cpu0.icache.overall_avg_miss_latency 12048.344860 # average overall miss latency +system.cpu0.icache.overall_avg_mshr_miss_latency 11047.036239 # average overall mshr miss latency system.cpu0.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu0.icache.overall_hits 56957639 # number of overall hits -system.cpu0.icache.overall_miss_latency 11006165000 # number of overall miss cycles -system.cpu0.icache.overall_miss_rate 0.015809 # miss rate for overall accesses -system.cpu0.icache.overall_misses 914912 # number of overall misses +system.cpu0.icache.overall_hits 50399501 # number of overall hits +system.cpu0.icache.overall_miss_latency 8212561500 # number of overall miss cycles +system.cpu0.icache.overall_miss_rate 0.013344 # miss rate for overall accesses +system.cpu0.icache.overall_misses 681634 # number of overall misses system.cpu0.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu0.icache.overall_mshr_miss_latency 10090564500 # number of overall MSHR miss cycles -system.cpu0.icache.overall_mshr_miss_rate 0.015809 # mshr miss rate for overall accesses -system.cpu0.icache.overall_mshr_misses 914912 # number of overall MSHR misses +system.cpu0.icache.overall_mshr_miss_latency 7530035500 # number of overall MSHR miss cycles +system.cpu0.icache.overall_mshr_miss_rate 0.013344 # mshr miss rate for overall accesses +system.cpu0.icache.overall_mshr_misses 681634 # number of overall MSHR misses system.cpu0.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu0.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu0.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -178,199 +171,191 @@ system.cpu0.icache.prefetcher.num_hwpf_issued 0 system.cpu0.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu0.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu0.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu0.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu0.icache.protocol.read_invalid 915158 # read misses to invalid blocks -system.cpu0.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu0.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu0.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu0.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu0.icache.protocol.snoop_read_exclusive 4652 # read snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu0.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu0.icache.protocol.snoop_read_shared 8768 # read snoops on shared blocks -system.cpu0.icache.protocol.snoop_readex_exclusive 121 # readEx snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu0.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu0.icache.protocol.snoop_readex_shared 1 # readEx snoops on shared blocks -system.cpu0.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu0.icache.protocol.snoop_upgrade_shared 12 # upgradee snoops on shared blocks -system.cpu0.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu0.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu0.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu0.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu0.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu0.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu0.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu0.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu0.icache.protocol.write_shared 0 # write misses to shared blocks -system.cpu0.icache.replacements 908876 # number of replacements -system.cpu0.icache.sampled_refs 909388 # Sample count of references to valid blocks. +system.cpu0.icache.replacements 680987 # number of replacements +system.cpu0.icache.sampled_refs 681499 # Sample count of references to valid blocks. system.cpu0.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu0.icache.tagsinuse 508.806183 # Cycle average of tags in use -system.cpu0.icache.total_refs 56957639 # Total number of references to valid blocks. -system.cpu0.icache.warmup_cycle 34906249000 # Cycle when the warmup percentage was hit. +system.cpu0.icache.tagsinuse 508.821605 # Cycle average of tags in use +system.cpu0.icache.total_refs 50399501 # Total number of references to valid blocks. +system.cpu0.icache.warmup_cycle 35300494000 # Cycle when the warmup percentage was hit. system.cpu0.icache.writebacks 0 # number of writebacks -system.cpu0.idle_fraction 0.943968 # Percentage of idle cycles -system.cpu0.itb.accesses 3944641 # ITB accesses -system.cpu0.itb.acv 143 # ITB acv -system.cpu0.itb.hits 3940800 # ITB hits -system.cpu0.itb.misses 3841 # ITB misses -system.cpu0.kern.callpal 187118 # number of callpals executed +system.cpu0.idle_fraction 0.949890 # Percentage of idle cycles +system.cpu0.itb.accesses 3593148 # ITB accesses +system.cpu0.itb.acv 161 # ITB acv +system.cpu0.itb.hits 3589202 # ITB hits +system.cpu0.itb.misses 3946 # ITB misses +system.cpu0.kern.callpal 145952 # number of callpals executed system.cpu0.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu0.kern.callpal_wripir 96 0.05% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrmces 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrfen 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.05% # number of callpals executed -system.cpu0.kern.callpal_swpctx 3865 2.07% 2.12% # number of callpals executed -system.cpu0.kern.callpal_tbi 44 0.02% 2.14% # number of callpals executed -system.cpu0.kern.callpal_wrent 7 0.00% 2.15% # number of callpals executed -system.cpu0.kern.callpal_swpipl 171254 91.52% 93.67% # number of callpals executed -system.cpu0.kern.callpal_rdps 6635 3.55% 97.21% # number of callpals executed -system.cpu0.kern.callpal_wrkgp 1 0.00% 97.21% # number of callpals executed -system.cpu0.kern.callpal_wrusp 4 0.00% 97.22% # number of callpals executed -system.cpu0.kern.callpal_rdusp 7 0.00% 97.22% # number of callpals executed -system.cpu0.kern.callpal_whami 2 0.00% 97.22% # number of callpals executed -system.cpu0.kern.callpal_rti 4694 2.51% 99.73% # number of callpals executed -system.cpu0.kern.callpal_callsys 356 0.19% 99.92% # number of callpals executed -system.cpu0.kern.callpal_imb 149 0.08% 100.00% # number of callpals executed +system.cpu0.kern.callpal_wripir 536 0.37% 0.37% # number of callpals executed +system.cpu0.kern.callpal_wrmces 1 0.00% 0.37% # number of callpals executed +system.cpu0.kern.callpal_wrfen 1 0.00% 0.37% # number of callpals executed +system.cpu0.kern.callpal_wrvptptr 1 0.00% 0.37% # number of callpals executed +system.cpu0.kern.callpal_swpctx 3014 2.07% 2.44% # number of callpals executed +system.cpu0.kern.callpal_tbi 46 0.03% 2.47% # number of callpals executed +system.cpu0.kern.callpal_wrent 7 0.00% 2.47% # number of callpals executed +system.cpu0.kern.callpal_swpipl 131018 89.77% 92.24% # number of callpals executed +system.cpu0.kern.callpal_rdps 6493 4.45% 96.69% # number of callpals executed +system.cpu0.kern.callpal_wrkgp 1 0.00% 96.69% # number of callpals executed +system.cpu0.kern.callpal_wrusp 4 0.00% 96.69% # number of callpals executed +system.cpu0.kern.callpal_rdusp 8 0.01% 96.70% # number of callpals executed +system.cpu0.kern.callpal_whami 2 0.00% 96.70% # number of callpals executed +system.cpu0.kern.callpal_rti 4302 2.95% 99.65% # number of callpals executed +system.cpu0.kern.callpal_callsys 368 0.25% 99.90% # number of callpals executed +system.cpu0.kern.callpal_imb 149 0.10% 100.00% # number of callpals executed system.cpu0.kern.inst.arm 0 # number of arm instructions executed -system.cpu0.kern.inst.hwrei 201983 # number of hwrei instructions executed -system.cpu0.kern.inst.quiesce 6162 # number of quiesce instructions executed -system.cpu0.kern.ipl_count 178054 # number of times we switched to this ipl -system.cpu0.kern.ipl_count_0 72322 40.62% 40.62% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_21 131 0.07% 40.69% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_22 1968 1.11% 41.80% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_30 6 0.00% 41.80% # number of times we switched to this ipl -system.cpu0.kern.ipl_count_31 103627 58.20% 100.00% # number of times we switched to this ipl -system.cpu0.kern.ipl_good 144005 # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_0 70953 49.27% 49.27% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_21 131 0.09% 49.36% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_22 1968 1.37% 50.73% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_30 6 0.00% 50.73% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_good_31 70947 49.27% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu0.kern.ipl_ticks 1951128432000 # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_0 1894864204500 97.12% 97.12% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_21 72482500 0.00% 97.12% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_22 564462000 0.03% 97.15% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_30 4114000 0.00% 97.15% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_ticks_31 55623169000 2.85% 100.00% # number of cycles we spent at this ipl -system.cpu0.kern.ipl_used_0 0.981071 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.inst.hwrei 161590 # number of hwrei instructions executed +system.cpu0.kern.inst.quiesce 6598 # number of quiesce instructions executed +system.cpu0.kern.ipl_count 137863 # number of times we switched to this ipl +system.cpu0.kern.ipl_count_0 55298 40.11% 40.11% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_21 131 0.10% 40.21% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_22 1969 1.43% 41.63% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_30 442 0.32% 41.95% # number of times we switched to this ipl +system.cpu0.kern.ipl_count_31 80023 58.05% 100.00% # number of times we switched to this ipl +system.cpu0.kern.ipl_good 111708 # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_0 54804 49.06% 49.06% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_21 131 0.12% 49.18% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_22 1969 1.76% 50.94% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_30 442 0.40% 51.34% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_good_31 54362 48.66% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu0.kern.ipl_ticks 1951366621000 # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_0 1898503749000 97.29% 97.29% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_21 76310500 0.00% 97.29% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_22 547835000 0.03% 97.32% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_30 278789500 0.01% 97.34% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_ticks_31 51959937000 2.66% 100.00% # number of cycles we spent at this ipl +system.cpu0.kern.ipl_used_0 0.991067 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu0.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.ipl_used_31 0.684638 # fraction of swpipl calls that actually changed the ipl -system.cpu0.kern.mode_good_kernel 1230 -system.cpu0.kern.mode_good_user 1231 +system.cpu0.kern.ipl_used_31 0.679330 # fraction of swpipl calls that actually changed the ipl +system.cpu0.kern.mode_good_kernel 1275 +system.cpu0.kern.mode_good_user 1276 system.cpu0.kern.mode_good_idle 0 -system.cpu0.kern.mode_switch_kernel 7215 # number of protection mode switches -system.cpu0.kern.mode_switch_user 1231 # number of protection mode switches +system.cpu0.kern.mode_switch_kernel 6846 # number of protection mode switches +system.cpu0.kern.mode_switch_user 1276 # number of protection mode switches system.cpu0.kern.mode_switch_idle 0 # number of protection mode switches system.cpu0.kern.mode_switch_good # fraction of useful protection mode switches -system.cpu0.kern.mode_switch_good_kernel 0.170478 # fraction of useful protection mode switches +system.cpu0.kern.mode_switch_good_kernel 0.186240 # fraction of useful protection mode switches system.cpu0.kern.mode_switch_good_user 1 # fraction of useful protection mode switches system.cpu0.kern.mode_switch_good_idle # fraction of useful protection mode switches -system.cpu0.kern.mode_ticks_kernel 1947973402000 99.84% 99.84% # number of ticks spent at the given mode -system.cpu0.kern.mode_ticks_user 3155028000 0.16% 100.00% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_kernel 1948118613000 99.83% 99.83% # number of ticks spent at the given mode +system.cpu0.kern.mode_ticks_user 3248006000 0.17% 100.00% # number of ticks spent at the given mode system.cpu0.kern.mode_ticks_idle 0 0.00% 100.00% # number of ticks spent at the given mode -system.cpu0.kern.swap_context 3866 # number of times the context was actually changed -system.cpu0.kern.syscall 224 # number of syscalls executed -system.cpu0.kern.syscall_2 6 2.68% 2.68% # number of syscalls executed -system.cpu0.kern.syscall_3 19 8.48% 11.16% # number of syscalls executed -system.cpu0.kern.syscall_4 3 1.34% 12.50% # number of syscalls executed -system.cpu0.kern.syscall_6 30 13.39% 25.89% # number of syscalls executed -system.cpu0.kern.syscall_12 1 0.45% 26.34% # number of syscalls executed -system.cpu0.kern.syscall_15 1 0.45% 26.79% # number of syscalls executed -system.cpu0.kern.syscall_17 10 4.46% 31.25% # number of syscalls executed -system.cpu0.kern.syscall_19 6 2.68% 33.93% # number of syscalls executed -system.cpu0.kern.syscall_20 4 1.79% 35.71% # number of syscalls executed -system.cpu0.kern.syscall_23 2 0.89% 36.61% # number of syscalls executed -system.cpu0.kern.syscall_24 4 1.79% 38.39% # number of syscalls executed -system.cpu0.kern.syscall_33 8 3.57% 41.96% # number of syscalls executed -system.cpu0.kern.syscall_41 2 0.89% 42.86% # number of syscalls executed -system.cpu0.kern.syscall_45 39 17.41% 60.27% # number of syscalls executed -system.cpu0.kern.syscall_47 4 1.79% 62.05% # number of syscalls executed -system.cpu0.kern.syscall_48 7 3.12% 65.18% # number of syscalls executed -system.cpu0.kern.syscall_54 9 4.02% 69.20% # number of syscalls executed -system.cpu0.kern.syscall_58 1 0.45% 69.64% # number of syscalls executed -system.cpu0.kern.syscall_59 5 2.23% 71.88% # number of syscalls executed -system.cpu0.kern.syscall_71 32 14.29% 86.16% # number of syscalls executed -system.cpu0.kern.syscall_73 3 1.34% 87.50% # number of syscalls executed -system.cpu0.kern.syscall_74 9 4.02% 91.52% # number of syscalls executed -system.cpu0.kern.syscall_87 1 0.45% 91.96% # number of syscalls executed -system.cpu0.kern.syscall_90 2 0.89% 92.86% # number of syscalls executed -system.cpu0.kern.syscall_92 7 3.12% 95.98% # number of syscalls executed -system.cpu0.kern.syscall_97 2 0.89% 96.87% # number of syscalls executed -system.cpu0.kern.syscall_98 2 0.89% 97.77% # number of syscalls executed -system.cpu0.kern.syscall_132 2 0.89% 98.66% # number of syscalls executed -system.cpu0.kern.syscall_144 1 0.45% 99.11% # number of syscalls executed -system.cpu0.kern.syscall_147 2 0.89% 100.00% # number of syscalls executed -system.cpu0.not_idle_fraction 0.056032 # Percentage of non-idle cycles -system.cpu0.numCycles 1951129131000 # number of cpu cycles simulated -system.cpu0.num_insts 57872550 # Number of instructions executed -system.cpu0.num_refs 15541096 # Number of memory references -system.cpu1.dcache.ReadReq_accesses 1052558 # number of ReadReq accesses(hits+misses) -system.cpu1.dcache.ReadReq_avg_miss_latency 11119.734481 # average ReadReq miss latency -system.cpu1.dcache.ReadReq_avg_mshr_miss_latency 10119.576119 # average ReadReq mshr miss latency -system.cpu1.dcache.ReadReq_hits 1014670 # number of ReadReq hits -system.cpu1.dcache.ReadReq_miss_latency 421304500 # number of ReadReq miss cycles -system.cpu1.dcache.ReadReq_miss_rate 0.035996 # miss rate for ReadReq accesses -system.cpu1.dcache.ReadReq_misses 37888 # number of ReadReq misses -system.cpu1.dcache.ReadReq_mshr_miss_latency 383410500 # number of ReadReq MSHR miss cycles -system.cpu1.dcache.ReadReq_mshr_miss_rate 0.035996 # mshr miss rate for ReadReq accesses -system.cpu1.dcache.ReadReq_mshr_misses 37888 # number of ReadReq MSHR misses -system.cpu1.dcache.ReadReq_mshr_uncacheable 120 # number of ReadReq MSHR uncacheable -system.cpu1.dcache.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu1.dcache.ReadResp_mshr_uncacheable_latency 14641500 # number of ReadResp MSHR uncacheable cycles -system.cpu1.dcache.WriteReq_accesses 677186 # number of WriteReq accesses(hits+misses) -system.cpu1.dcache.WriteReq_avg_miss_latency 11920.138166 # average WriteReq miss latency -system.cpu1.dcache.WriteReq_avg_mshr_miss_latency 10843.231096 # average WriteReq mshr miss latency -system.cpu1.dcache.WriteReq_hits 653157 # number of WriteReq hits -system.cpu1.dcache.WriteReq_miss_latency 286429000 # number of WriteReq miss cycles -system.cpu1.dcache.WriteReq_miss_rate 0.035484 # miss rate for WriteReq accesses -system.cpu1.dcache.WriteReq_misses 24029 # number of WriteReq misses -system.cpu1.dcache.WriteReq_mshr_miss_latency 260552000 # number of WriteReq MSHR miss cycles -system.cpu1.dcache.WriteReq_mshr_miss_rate 0.035484 # mshr miss rate for WriteReq accesses -system.cpu1.dcache.WriteReq_mshr_misses 24029 # number of WriteReq MSHR misses -system.cpu1.dcache.WriteReq_mshr_uncacheable 2496 # number of WriteReq MSHR uncacheable -system.cpu1.dcache.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu1.dcache.WriteResp_mshr_uncacheable_latency 304596500 # number of WriteResp MSHR uncacheable cycles +system.cpu0.kern.swap_context 3015 # number of times the context was actually changed +system.cpu0.kern.syscall 228 # number of syscalls executed +system.cpu0.kern.syscall_2 7 3.07% 3.07% # number of syscalls executed +system.cpu0.kern.syscall_3 19 8.33% 11.40% # number of syscalls executed +system.cpu0.kern.syscall_4 3 1.32% 12.72% # number of syscalls executed +system.cpu0.kern.syscall_6 31 13.60% 26.32% # number of syscalls executed +system.cpu0.kern.syscall_12 1 0.44% 26.75% # number of syscalls executed +system.cpu0.kern.syscall_15 1 0.44% 27.19% # number of syscalls executed +system.cpu0.kern.syscall_17 10 4.39% 31.58% # number of syscalls executed +system.cpu0.kern.syscall_19 6 2.63% 34.21% # number of syscalls executed +system.cpu0.kern.syscall_20 4 1.75% 35.96% # number of syscalls executed +system.cpu0.kern.syscall_23 2 0.88% 36.84% # number of syscalls executed +system.cpu0.kern.syscall_24 4 1.75% 38.60% # number of syscalls executed +system.cpu0.kern.syscall_33 8 3.51% 42.11% # number of syscalls executed +system.cpu0.kern.syscall_41 2 0.88% 42.98% # number of syscalls executed +system.cpu0.kern.syscall_45 39 17.11% 60.09% # number of syscalls executed +system.cpu0.kern.syscall_47 4 1.75% 61.84% # number of syscalls executed +system.cpu0.kern.syscall_48 8 3.51% 65.35% # number of syscalls executed +system.cpu0.kern.syscall_54 9 3.95% 69.30% # number of syscalls executed +system.cpu0.kern.syscall_58 1 0.44% 69.74% # number of syscalls executed +system.cpu0.kern.syscall_59 6 2.63% 72.37% # number of syscalls executed +system.cpu0.kern.syscall_71 32 14.04% 86.40% # number of syscalls executed +system.cpu0.kern.syscall_73 3 1.32% 87.72% # number of syscalls executed +system.cpu0.kern.syscall_74 9 3.95% 91.67% # number of syscalls executed +system.cpu0.kern.syscall_87 1 0.44% 92.11% # number of syscalls executed +system.cpu0.kern.syscall_90 2 0.88% 92.98% # number of syscalls executed +system.cpu0.kern.syscall_92 7 3.07% 96.05% # number of syscalls executed +system.cpu0.kern.syscall_97 2 0.88% 96.93% # number of syscalls executed +system.cpu0.kern.syscall_98 2 0.88% 97.81% # number of syscalls executed +system.cpu0.kern.syscall_132 2 0.88% 98.68% # number of syscalls executed +system.cpu0.kern.syscall_144 1 0.44% 99.12% # number of syscalls executed +system.cpu0.kern.syscall_147 2 0.88% 100.00% # number of syscalls executed +system.cpu0.not_idle_fraction 0.050110 # Percentage of non-idle cycles +system.cpu0.numCycles 1951367346000 # number of cpu cycles simulated +system.cpu0.num_insts 51081134 # Number of instructions executed +system.cpu0.num_refs 13268864 # Number of memory references +system.cpu1.dcache.LoadLockedReq_accesses 61056 # number of LoadLockedReq accesses(hits+misses) +system.cpu1.dcache.LoadLockedReq_avg_miss_latency 9095.192614 # average LoadLockedReq miss latency +system.cpu1.dcache.LoadLockedReq_avg_mshr_miss_latency 8095.192614 # average LoadLockedReq mshr miss latency +system.cpu1.dcache.LoadLockedReq_hits 51633 # number of LoadLockedReq hits +system.cpu1.dcache.LoadLockedReq_miss_latency 85704000 # number of LoadLockedReq miss cycles +system.cpu1.dcache.LoadLockedReq_miss_rate 0.154334 # miss rate for LoadLockedReq accesses +system.cpu1.dcache.LoadLockedReq_misses 9423 # number of LoadLockedReq misses +system.cpu1.dcache.LoadLockedReq_mshr_miss_latency 76281000 # number of LoadLockedReq MSHR miss cycles +system.cpu1.dcache.LoadLockedReq_mshr_miss_rate 0.154334 # mshr miss rate for LoadLockedReq accesses +system.cpu1.dcache.LoadLockedReq_mshr_misses 9423 # number of LoadLockedReq MSHR misses +system.cpu1.dcache.ReadReq_accesses 2457845 # number of ReadReq accesses(hits+misses) +system.cpu1.dcache.ReadReq_avg_miss_latency 11653.965886 # average ReadReq miss latency +system.cpu1.dcache.ReadReq_avg_mshr_miss_latency 10653.909138 # average ReadReq mshr miss latency +system.cpu1.dcache.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu1.dcache.ReadReq_hits 2334493 # number of ReadReq hits +system.cpu1.dcache.ReadReq_miss_latency 1437540000 # number of ReadReq miss cycles +system.cpu1.dcache.ReadReq_miss_rate 0.050187 # miss rate for ReadReq accesses +system.cpu1.dcache.ReadReq_misses 123352 # number of ReadReq misses +system.cpu1.dcache.ReadReq_mshr_miss_latency 1314181000 # number of ReadReq MSHR miss cycles +system.cpu1.dcache.ReadReq_mshr_miss_rate 0.050187 # mshr miss rate for ReadReq accesses +system.cpu1.dcache.ReadReq_mshr_misses 123352 # number of ReadReq MSHR misses +system.cpu1.dcache.ReadReq_mshr_uncacheable_latency 16729500 # number of ReadReq MSHR uncacheable cycles +system.cpu1.dcache.StoreCondReq_accesses 60551 # number of StoreCondReq accesses(hits+misses) +system.cpu1.dcache.StoreCondReq_avg_miss_latency 10960.125479 # average StoreCondReq miss latency +system.cpu1.dcache.StoreCondReq_avg_mshr_miss_latency 9960.125479 # average StoreCondReq mshr miss latency +system.cpu1.dcache.StoreCondReq_hits 46206 # number of StoreCondReq hits +system.cpu1.dcache.StoreCondReq_miss_latency 157223000 # number of StoreCondReq miss cycles +system.cpu1.dcache.StoreCondReq_miss_rate 0.236908 # miss rate for StoreCondReq accesses +system.cpu1.dcache.StoreCondReq_misses 14345 # number of StoreCondReq misses +system.cpu1.dcache.StoreCondReq_mshr_miss_latency 142878000 # number of StoreCondReq MSHR miss cycles +system.cpu1.dcache.StoreCondReq_mshr_miss_rate 0.236908 # mshr miss rate for StoreCondReq accesses +system.cpu1.dcache.StoreCondReq_mshr_misses 14345 # number of StoreCondReq MSHR misses +system.cpu1.dcache.WriteReq_accesses 1792743 # number of WriteReq accesses(hits+misses) +system.cpu1.dcache.WriteReq_avg_miss_latency 13398.121192 # average WriteReq miss latency +system.cpu1.dcache.WriteReq_avg_mshr_miss_latency 12398.121192 # average WriteReq mshr miss latency +system.cpu1.dcache.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu1.dcache.WriteReq_hits 1700344 # number of WriteReq hits +system.cpu1.dcache.WriteReq_miss_latency 1237973000 # number of WriteReq miss cycles +system.cpu1.dcache.WriteReq_miss_rate 0.051541 # miss rate for WriteReq accesses +system.cpu1.dcache.WriteReq_misses 92399 # number of WriteReq misses +system.cpu1.dcache.WriteReq_mshr_miss_latency 1145574000 # number of WriteReq MSHR miss cycles +system.cpu1.dcache.WriteReq_mshr_miss_rate 0.051541 # mshr miss rate for WriteReq accesses +system.cpu1.dcache.WriteReq_mshr_misses 92399 # number of WriteReq MSHR misses +system.cpu1.dcache.WriteReq_mshr_uncacheable_latency 421374000 # number of WriteReq MSHR uncacheable cycles system.cpu1.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu1.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu1.dcache.avg_refs 29.876823 # Average number of references to valid blocks. +system.cpu1.dcache.avg_refs 23.577992 # Average number of references to valid blocks. system.cpu1.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu1.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu1.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu1.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu1.dcache.cache_copies 0 # number of cache copies performed -system.cpu1.dcache.demand_accesses 1729744 # number of demand (read+write) accesses -system.cpu1.dcache.demand_avg_miss_latency 11430.358383 # average overall miss latency -system.cpu1.dcache.demand_avg_mshr_miss_latency 10400.415072 # average overall mshr miss latency -system.cpu1.dcache.demand_hits 1667827 # number of demand (read+write) hits -system.cpu1.dcache.demand_miss_latency 707733500 # number of demand (read+write) miss cycles -system.cpu1.dcache.demand_miss_rate 0.035795 # miss rate for demand accesses -system.cpu1.dcache.demand_misses 61917 # number of demand (read+write) misses +system.cpu1.dcache.demand_accesses 4250588 # number of demand (read+write) accesses +system.cpu1.dcache.demand_avg_miss_latency 12400.929776 # average overall miss latency +system.cpu1.dcache.demand_avg_mshr_miss_latency 11400.897331 # average overall mshr miss latency +system.cpu1.dcache.demand_hits 4034837 # number of demand (read+write) hits +system.cpu1.dcache.demand_miss_latency 2675513000 # number of demand (read+write) miss cycles +system.cpu1.dcache.demand_miss_rate 0.050758 # miss rate for demand accesses +system.cpu1.dcache.demand_misses 215751 # number of demand (read+write) misses system.cpu1.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu1.dcache.demand_mshr_miss_latency 643962500 # number of demand (read+write) MSHR miss cycles -system.cpu1.dcache.demand_mshr_miss_rate 0.035795 # mshr miss rate for demand accesses -system.cpu1.dcache.demand_mshr_misses 61917 # number of demand (read+write) MSHR misses +system.cpu1.dcache.demand_mshr_miss_latency 2459755000 # number of demand (read+write) MSHR miss cycles +system.cpu1.dcache.demand_mshr_miss_rate 0.050758 # mshr miss rate for demand accesses +system.cpu1.dcache.demand_mshr_misses 215751 # number of demand (read+write) MSHR misses system.cpu1.dcache.fast_writes 0 # number of fast writes performed system.cpu1.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu1.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu1.dcache.overall_accesses 1729744 # number of overall (read+write) accesses -system.cpu1.dcache.overall_avg_miss_latency 11430.358383 # average overall miss latency -system.cpu1.dcache.overall_avg_mshr_miss_latency 10400.415072 # average overall mshr miss latency -system.cpu1.dcache.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu1.dcache.overall_hits 1667827 # number of overall hits -system.cpu1.dcache.overall_miss_latency 707733500 # number of overall miss cycles -system.cpu1.dcache.overall_miss_rate 0.035795 # miss rate for overall accesses -system.cpu1.dcache.overall_misses 61917 # number of overall misses +system.cpu1.dcache.overall_accesses 4250588 # number of overall (read+write) accesses +system.cpu1.dcache.overall_avg_miss_latency 12400.929776 # average overall miss latency +system.cpu1.dcache.overall_avg_mshr_miss_latency 11400.897331 # average overall mshr miss latency +system.cpu1.dcache.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu1.dcache.overall_hits 4034837 # number of overall hits +system.cpu1.dcache.overall_miss_latency 2675513000 # number of overall miss cycles +system.cpu1.dcache.overall_miss_rate 0.050758 # miss rate for overall accesses +system.cpu1.dcache.overall_misses 215751 # number of overall misses system.cpu1.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu1.dcache.overall_mshr_miss_latency 643962500 # number of overall MSHR miss cycles -system.cpu1.dcache.overall_mshr_miss_rate 0.035795 # mshr miss rate for overall accesses -system.cpu1.dcache.overall_mshr_misses 61917 # number of overall MSHR misses -system.cpu1.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu1.dcache.overall_mshr_uncacheable_misses 2616 # number of overall MSHR uncacheable misses +system.cpu1.dcache.overall_mshr_miss_latency 2459755000 # number of overall MSHR miss cycles +system.cpu1.dcache.overall_mshr_miss_rate 0.050758 # mshr miss rate for overall accesses +system.cpu1.dcache.overall_mshr_misses 215751 # number of overall MSHR misses +system.cpu1.dcache.overall_mshr_uncacheable_latency 438103500 # number of overall MSHR uncacheable cycles +system.cpu1.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu1.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu1.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu1.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -380,95 +365,69 @@ system.cpu1.dcache.prefetcher.num_hwpf_issued 0 system.cpu1.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu1.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu1.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu1.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu1.dcache.protocol.read_invalid 37951 # read misses to invalid blocks -system.cpu1.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu1.dcache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu1.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu1.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu1.dcache.protocol.snoop_read_exclusive 906 # read snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_read_modified 1965 # read snoops on modified blocks -system.cpu1.dcache.protocol.snoop_read_owned 254 # read snoops on owned blocks -system.cpu1.dcache.protocol.snoop_read_shared 65869 # read snoops on shared blocks -system.cpu1.dcache.protocol.snoop_readex_exclusive 191 # readEx snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_readex_modified 198 # readEx snoops on modified blocks -system.cpu1.dcache.protocol.snoop_readex_owned 48 # readEx snoops on owned blocks -system.cpu1.dcache.protocol.snoop_readex_shared 42 # readEx snoops on shared blocks -system.cpu1.dcache.protocol.snoop_upgrade_owned 1132 # upgrade snoops on owned blocks -system.cpu1.dcache.protocol.snoop_upgrade_shared 2716 # upgradee snoops on shared blocks -system.cpu1.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu1.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu1.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu1.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu1.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu1.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu1.dcache.protocol.write_invalid 22206 # write misses to invalid blocks -system.cpu1.dcache.protocol.write_owned 601 # write misses to owned blocks -system.cpu1.dcache.protocol.write_shared 1247 # write misses to shared blocks -system.cpu1.dcache.replacements 55360 # number of replacements -system.cpu1.dcache.sampled_refs 55749 # Sample count of references to valid blocks. +system.cpu1.dcache.replacements 176474 # number of replacements +system.cpu1.dcache.sampled_refs 176909 # Sample count of references to valid blocks. system.cpu1.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu1.dcache.tagsinuse 388.749341 # Cycle average of tags in use -system.cpu1.dcache.total_refs 1665603 # Total number of references to valid blocks. -system.cpu1.dcache.warmup_cycle 1935095598000 # Cycle when the warmup percentage was hit. -system.cpu1.dcache.writebacks 27663 # number of writebacks -system.cpu1.dtb.accesses 302878 # DTB accesses -system.cpu1.dtb.acv 84 # DTB access violations -system.cpu1.dtb.hits 1728432 # DTB hits -system.cpu1.dtb.misses 3106 # DTB misses -system.cpu1.dtb.read_accesses 205838 # DTB read accesses -system.cpu1.dtb.read_acv 36 # DTB read access violations -system.cpu1.dtb.read_hits 1049360 # DTB read hits -system.cpu1.dtb.read_misses 2750 # DTB read misses -system.cpu1.dtb.write_accesses 97040 # DTB write accesses -system.cpu1.dtb.write_acv 48 # DTB write access violations -system.cpu1.dtb.write_hits 679072 # DTB write hits -system.cpu1.dtb.write_misses 356 # DTB write misses -system.cpu1.icache.ReadReq_accesses 5376264 # number of ReadReq accesses(hits+misses) -system.cpu1.icache.ReadReq_avg_miss_latency 12045.939531 # average ReadReq miss latency -system.cpu1.icache.ReadReq_avg_mshr_miss_latency 11045.466957 # average ReadReq mshr miss latency -system.cpu1.icache.ReadReq_hits 5281041 # number of ReadReq hits -system.cpu1.icache.ReadReq_miss_latency 1147050500 # number of ReadReq miss cycles -system.cpu1.icache.ReadReq_miss_rate 0.017712 # miss rate for ReadReq accesses -system.cpu1.icache.ReadReq_misses 95223 # number of ReadReq misses -system.cpu1.icache.ReadReq_mshr_miss_latency 1051782500 # number of ReadReq MSHR miss cycles -system.cpu1.icache.ReadReq_mshr_miss_rate 0.017712 # mshr miss rate for ReadReq accesses -system.cpu1.icache.ReadReq_mshr_misses 95223 # number of ReadReq MSHR misses +system.cpu1.dcache.tagsinuse 471.274557 # Cycle average of tags in use +system.cpu1.dcache.total_refs 4171159 # Total number of references to valid blocks. +system.cpu1.dcache.warmup_cycle 1917859097000 # Cycle when the warmup percentage was hit. +system.cpu1.dcache.writebacks 93260 # number of writebacks +system.cpu1.dtb.accesses 296718 # DTB accesses +system.cpu1.dtb.acv 62 # DTB access violations +system.cpu1.dtb.hits 4358656 # DTB hits +system.cpu1.dtb.misses 2867 # DTB misses +system.cpu1.dtb.read_accesses 201817 # DTB read accesses +system.cpu1.dtb.read_acv 26 # DTB read access violations +system.cpu1.dtb.read_hits 2507309 # DTB read hits +system.cpu1.dtb.read_misses 2546 # DTB read misses +system.cpu1.dtb.write_accesses 94901 # DTB write accesses +system.cpu1.dtb.write_acv 36 # DTB write access violations +system.cpu1.dtb.write_hits 1851347 # DTB write hits +system.cpu1.dtb.write_misses 321 # DTB write misses +system.cpu1.icache.ReadReq_accesses 13758345 # number of ReadReq accesses(hits+misses) +system.cpu1.icache.ReadReq_avg_miss_latency 12026.498126 # average ReadReq miss latency +system.cpu1.icache.ReadReq_avg_mshr_miss_latency 11026.342473 # average ReadReq mshr miss latency +system.cpu1.icache.ReadReq_hits 13421057 # number of ReadReq hits +system.cpu1.icache.ReadReq_miss_latency 4056393500 # number of ReadReq miss cycles +system.cpu1.icache.ReadReq_miss_rate 0.024515 # miss rate for ReadReq accesses +system.cpu1.icache.ReadReq_misses 337288 # number of ReadReq misses +system.cpu1.icache.ReadReq_mshr_miss_latency 3719053000 # number of ReadReq MSHR miss cycles +system.cpu1.icache.ReadReq_mshr_miss_rate 0.024515 # mshr miss rate for ReadReq accesses +system.cpu1.icache.ReadReq_mshr_misses 337288 # number of ReadReq MSHR misses system.cpu1.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu1.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu1.icache.avg_refs 57.662729 # Average number of references to valid blocks. +system.cpu1.icache.avg_refs 39.794511 # Average number of references to valid blocks. system.cpu1.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu1.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu1.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu1.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu1.icache.cache_copies 0 # number of cache copies performed -system.cpu1.icache.demand_accesses 5376264 # number of demand (read+write) accesses -system.cpu1.icache.demand_avg_miss_latency 12045.939531 # average overall miss latency -system.cpu1.icache.demand_avg_mshr_miss_latency 11045.466957 # average overall mshr miss latency -system.cpu1.icache.demand_hits 5281041 # number of demand (read+write) hits -system.cpu1.icache.demand_miss_latency 1147050500 # number of demand (read+write) miss cycles -system.cpu1.icache.demand_miss_rate 0.017712 # miss rate for demand accesses -system.cpu1.icache.demand_misses 95223 # number of demand (read+write) misses +system.cpu1.icache.demand_accesses 13758345 # number of demand (read+write) accesses +system.cpu1.icache.demand_avg_miss_latency 12026.498126 # average overall miss latency +system.cpu1.icache.demand_avg_mshr_miss_latency 11026.342473 # average overall mshr miss latency +system.cpu1.icache.demand_hits 13421057 # number of demand (read+write) hits +system.cpu1.icache.demand_miss_latency 4056393500 # number of demand (read+write) miss cycles +system.cpu1.icache.demand_miss_rate 0.024515 # miss rate for demand accesses +system.cpu1.icache.demand_misses 337288 # number of demand (read+write) misses system.cpu1.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu1.icache.demand_mshr_miss_latency 1051782500 # number of demand (read+write) MSHR miss cycles -system.cpu1.icache.demand_mshr_miss_rate 0.017712 # mshr miss rate for demand accesses -system.cpu1.icache.demand_mshr_misses 95223 # number of demand (read+write) MSHR misses +system.cpu1.icache.demand_mshr_miss_latency 3719053000 # number of demand (read+write) MSHR miss cycles +system.cpu1.icache.demand_mshr_miss_rate 0.024515 # mshr miss rate for demand accesses +system.cpu1.icache.demand_mshr_misses 337288 # number of demand (read+write) MSHR misses system.cpu1.icache.fast_writes 0 # number of fast writes performed system.cpu1.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu1.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu1.icache.overall_accesses 5376264 # number of overall (read+write) accesses -system.cpu1.icache.overall_avg_miss_latency 12045.939531 # average overall miss latency -system.cpu1.icache.overall_avg_mshr_miss_latency 11045.466957 # average overall mshr miss latency +system.cpu1.icache.overall_accesses 13758345 # number of overall (read+write) accesses +system.cpu1.icache.overall_avg_miss_latency 12026.498126 # average overall miss latency +system.cpu1.icache.overall_avg_mshr_miss_latency 11026.342473 # average overall mshr miss latency system.cpu1.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu1.icache.overall_hits 5281041 # number of overall hits -system.cpu1.icache.overall_miss_latency 1147050500 # number of overall miss cycles -system.cpu1.icache.overall_miss_rate 0.017712 # miss rate for overall accesses -system.cpu1.icache.overall_misses 95223 # number of overall misses +system.cpu1.icache.overall_hits 13421057 # number of overall hits +system.cpu1.icache.overall_miss_latency 4056393500 # number of overall miss cycles +system.cpu1.icache.overall_miss_rate 0.024515 # miss rate for overall accesses +system.cpu1.icache.overall_misses 337288 # number of overall misses system.cpu1.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu1.icache.overall_mshr_miss_latency 1051782500 # number of overall MSHR miss cycles -system.cpu1.icache.overall_mshr_miss_rate 0.017712 # mshr miss rate for overall accesses -system.cpu1.icache.overall_mshr_misses 95223 # number of overall MSHR misses +system.cpu1.icache.overall_mshr_miss_latency 3719053000 # number of overall MSHR miss cycles +system.cpu1.icache.overall_mshr_miss_rate 0.024515 # mshr miss rate for overall accesses +system.cpu1.icache.overall_mshr_misses 337288 # number of overall MSHR misses system.cpu1.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu1.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu1.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -480,124 +439,98 @@ system.cpu1.icache.prefetcher.num_hwpf_issued 0 system.cpu1.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu1.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu1.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu1.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu1.icache.protocol.read_invalid 97341 # read misses to invalid blocks -system.cpu1.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu1.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu1.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu1.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu1.icache.protocol.snoop_read_exclusive 39627 # read snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu1.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu1.icache.protocol.snoop_read_shared 214588 # read snoops on shared blocks -system.cpu1.icache.protocol.snoop_readex_exclusive 26 # readEx snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu1.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu1.icache.protocol.snoop_readex_shared 0 # readEx snoops on shared blocks -system.cpu1.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu1.icache.protocol.snoop_upgrade_shared 2 # upgradee snoops on shared blocks -system.cpu1.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu1.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu1.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu1.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu1.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu1.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu1.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu1.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu1.icache.protocol.write_shared 0 # write misses to shared blocks -system.cpu1.icache.replacements 91073 # number of replacements -system.cpu1.icache.sampled_refs 91585 # Sample count of references to valid blocks. +system.cpu1.icache.replacements 336747 # number of replacements +system.cpu1.icache.sampled_refs 337259 # Sample count of references to valid blocks. system.cpu1.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu1.icache.tagsinuse 420.500398 # Cycle average of tags in use -system.cpu1.icache.total_refs 5281041 # Total number of references to valid blocks. -system.cpu1.icache.warmup_cycle 1947911714000 # Cycle when the warmup percentage was hit. +system.cpu1.icache.tagsinuse 445.859240 # Cycle average of tags in use +system.cpu1.icache.total_refs 13421057 # Total number of references to valid blocks. +system.cpu1.icache.warmup_cycle 1946103109000 # Cycle when the warmup percentage was hit. system.cpu1.icache.writebacks 0 # number of writebacks -system.cpu1.idle_fraction 0.995322 # Percentage of idle cycles -system.cpu1.itb.accesses 1399877 # ITB accesses -system.cpu1.itb.acv 41 # ITB acv -system.cpu1.itb.hits 1398631 # ITB hits -system.cpu1.itb.misses 1246 # ITB misses -system.cpu1.kern.callpal 29847 # number of callpals executed +system.cpu1.idle_fraction 0.987201 # Percentage of idle cycles +system.cpu1.itb.accesses 1878768 # ITB accesses +system.cpu1.itb.acv 23 # ITB acv +system.cpu1.itb.hits 1877648 # ITB hits +system.cpu1.itb.misses 1120 # ITB misses +system.cpu1.kern.callpal 75334 # number of callpals executed system.cpu1.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed -system.cpu1.kern.callpal_wripir 6 0.02% 0.02% # number of callpals executed -system.cpu1.kern.callpal_wrmces 1 0.00% 0.03% # number of callpals executed -system.cpu1.kern.callpal_wrfen 1 0.00% 0.03% # number of callpals executed -system.cpu1.kern.callpal_swpctx 375 1.26% 1.29% # number of callpals executed -system.cpu1.kern.callpal_tbi 10 0.03% 1.32% # number of callpals executed -system.cpu1.kern.callpal_wrent 7 0.02% 1.34% # number of callpals executed -system.cpu1.kern.callpal_swpipl 24461 81.95% 83.30% # number of callpals executed -system.cpu1.kern.callpal_rdps 2201 7.37% 90.67% # number of callpals executed -system.cpu1.kern.callpal_wrkgp 1 0.00% 90.68% # number of callpals executed -system.cpu1.kern.callpal_wrusp 3 0.01% 90.69% # number of callpals executed -system.cpu1.kern.callpal_rdusp 2 0.01% 90.69% # number of callpals executed -system.cpu1.kern.callpal_whami 3 0.01% 90.70% # number of callpals executed -system.cpu1.kern.callpal_rti 2582 8.65% 99.35% # number of callpals executed -system.cpu1.kern.callpal_callsys 161 0.54% 99.89% # number of callpals executed -system.cpu1.kern.callpal_imb 31 0.10% 100.00% # number of callpals executed +system.cpu1.kern.callpal_wripir 442 0.59% 0.59% # number of callpals executed +system.cpu1.kern.callpal_wrmces 1 0.00% 0.59% # number of callpals executed +system.cpu1.kern.callpal_wrfen 1 0.00% 0.59% # number of callpals executed +system.cpu1.kern.callpal_swpctx 2091 2.78% 3.37% # number of callpals executed +system.cpu1.kern.callpal_tbi 7 0.01% 3.38% # number of callpals executed +system.cpu1.kern.callpal_wrent 7 0.01% 3.38% # number of callpals executed +system.cpu1.kern.callpal_swpipl 66409 88.15% 91.54% # number of callpals executed +system.cpu1.kern.callpal_rdps 2344 3.11% 94.65% # number of callpals executed +system.cpu1.kern.callpal_wrkgp 1 0.00% 94.65% # number of callpals executed +system.cpu1.kern.callpal_wrusp 3 0.00% 94.65% # number of callpals executed +system.cpu1.kern.callpal_rdusp 1 0.00% 94.66% # number of callpals executed +system.cpu1.kern.callpal_whami 3 0.00% 94.66% # number of callpals executed +system.cpu1.kern.callpal_rti 3844 5.10% 99.76% # number of callpals executed +system.cpu1.kern.callpal_callsys 147 0.20% 99.96% # number of callpals executed +system.cpu1.kern.callpal_imb 31 0.04% 100.00% # number of callpals executed system.cpu1.kern.callpal_rdunique 1 0.00% 100.00% # number of callpals executed system.cpu1.kern.inst.arm 0 # number of arm instructions executed -system.cpu1.kern.inst.hwrei 36385 # number of hwrei instructions executed -system.cpu1.kern.inst.quiesce 2332 # number of quiesce instructions executed -system.cpu1.kern.ipl_count 29103 # number of times we switched to this ipl -system.cpu1.kern.ipl_count_0 9344 32.11% 32.11% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_22 1963 6.75% 38.85% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_30 96 0.33% 39.18% # number of times we switched to this ipl -system.cpu1.kern.ipl_count_31 17700 60.82% 100.00% # number of times we switched to this ipl -system.cpu1.kern.ipl_good 20635 # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_0 9336 45.24% 45.24% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_22 1963 9.51% 54.76% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_30 96 0.47% 55.22% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_good_31 9240 44.78% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu1.kern.ipl_ticks 1950372731000 # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_0 1911409272000 98.00% 98.00% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_22 494740000 0.03% 98.03% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_30 52316000 0.00% 98.03% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_ticks_31 38416403000 1.97% 100.00% # number of cycles we spent at this ipl -system.cpu1.kern.ipl_used_0 0.999144 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.inst.hwrei 81908 # number of hwrei instructions executed +system.cpu1.kern.inst.quiesce 2770 # number of quiesce instructions executed +system.cpu1.kern.ipl_count 72754 # number of times we switched to this ipl +system.cpu1.kern.ipl_count_0 28089 38.61% 38.61% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_22 1964 2.70% 41.31% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_30 536 0.74% 42.04% # number of times we switched to this ipl +system.cpu1.kern.ipl_count_31 42165 57.96% 100.00% # number of times we switched to this ipl +system.cpu1.kern.ipl_good 56376 # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_0 27206 48.26% 48.26% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_22 1964 3.48% 51.74% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_30 536 0.95% 52.69% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_good_31 26670 47.31% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu1.kern.ipl_ticks 1951174446000 # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_0 1904796411500 97.62% 97.62% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_22 499877500 0.03% 97.65% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_30 327859000 0.02% 97.67% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_ticks_31 45550298000 2.33% 100.00% # number of cycles we spent at this ipl +system.cpu1.kern.ipl_used_0 0.968564 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl system.cpu1.kern.ipl_used_30 1 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.ipl_used_31 0.522034 # fraction of swpipl calls that actually changed the ipl -system.cpu1.kern.mode_good_kernel 538 -system.cpu1.kern.mode_good_user 517 -system.cpu1.kern.mode_good_idle 21 -system.cpu1.kern.mode_switch_kernel 884 # number of protection mode switches -system.cpu1.kern.mode_switch_user 517 # number of protection mode switches -system.cpu1.kern.mode_switch_idle 2075 # number of protection mode switches -system.cpu1.kern.mode_switch_good 1.618718 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_kernel 0.608597 # fraction of useful protection mode switches +system.cpu1.kern.ipl_used_31 0.632515 # fraction of swpipl calls that actually changed the ipl +system.cpu1.kern.mode_good_kernel 924 +system.cpu1.kern.mode_good_user 463 +system.cpu1.kern.mode_good_idle 461 +system.cpu1.kern.mode_switch_kernel 2120 # number of protection mode switches +system.cpu1.kern.mode_switch_user 463 # number of protection mode switches +system.cpu1.kern.mode_switch_idle 2943 # number of protection mode switches +system.cpu1.kern.mode_switch_good 1.592492 # fraction of useful protection mode switches +system.cpu1.kern.mode_switch_good_kernel 0.435849 # fraction of useful protection mode switches system.cpu1.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu1.kern.mode_switch_good_idle 0.010120 # fraction of useful protection mode switches -system.cpu1.kern.mode_ticks_kernel 3563216000 0.18% 0.18% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_user 1513259000 0.08% 0.26% # number of ticks spent at the given mode -system.cpu1.kern.mode_ticks_idle 1945257297000 99.74% 100.00% # number of ticks spent at the given mode -system.cpu1.kern.swap_context 376 # number of times the context was actually changed -system.cpu1.kern.syscall 102 # number of syscalls executed -system.cpu1.kern.syscall_2 2 1.96% 1.96% # number of syscalls executed -system.cpu1.kern.syscall_3 11 10.78% 12.75% # number of syscalls executed -system.cpu1.kern.syscall_4 1 0.98% 13.73% # number of syscalls executed -system.cpu1.kern.syscall_6 12 11.76% 25.49% # number of syscalls executed -system.cpu1.kern.syscall_17 5 4.90% 30.39% # number of syscalls executed -system.cpu1.kern.syscall_19 4 3.92% 34.31% # number of syscalls executed -system.cpu1.kern.syscall_20 2 1.96% 36.27% # number of syscalls executed -system.cpu1.kern.syscall_23 2 1.96% 38.24% # number of syscalls executed -system.cpu1.kern.syscall_24 2 1.96% 40.20% # number of syscalls executed -system.cpu1.kern.syscall_33 3 2.94% 43.14% # number of syscalls executed -system.cpu1.kern.syscall_45 15 14.71% 57.84% # number of syscalls executed -system.cpu1.kern.syscall_47 2 1.96% 59.80% # number of syscalls executed -system.cpu1.kern.syscall_48 3 2.94% 62.75% # number of syscalls executed -system.cpu1.kern.syscall_54 1 0.98% 63.73% # number of syscalls executed -system.cpu1.kern.syscall_59 2 1.96% 65.69% # number of syscalls executed -system.cpu1.kern.syscall_71 22 21.57% 87.25% # number of syscalls executed -system.cpu1.kern.syscall_74 7 6.86% 94.12% # number of syscalls executed -system.cpu1.kern.syscall_90 1 0.98% 95.10% # number of syscalls executed -system.cpu1.kern.syscall_92 2 1.96% 97.06% # number of syscalls executed -system.cpu1.kern.syscall_132 2 1.96% 99.02% # number of syscalls executed -system.cpu1.kern.syscall_144 1 0.98% 100.00% # number of syscalls executed -system.cpu1.not_idle_fraction 0.004678 # Percentage of non-idle cycles -system.cpu1.numCycles 1950372761000 # number of cpu cycles simulated -system.cpu1.num_insts 5376264 # Number of instructions executed -system.cpu1.num_refs 1738417 # Number of memory references +system.cpu1.kern.mode_switch_good_idle 0.156643 # fraction of useful protection mode switches +system.cpu1.kern.mode_ticks_kernel 18594859000 0.95% 0.95% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_user 1499702000 0.08% 1.03% # number of ticks spent at the given mode +system.cpu1.kern.mode_ticks_idle 1930131145000 98.97% 100.00% # number of ticks spent at the given mode +system.cpu1.kern.swap_context 2092 # number of times the context was actually changed +system.cpu1.kern.syscall 98 # number of syscalls executed +system.cpu1.kern.syscall_2 1 1.02% 1.02% # number of syscalls executed +system.cpu1.kern.syscall_3 11 11.22% 12.24% # number of syscalls executed +system.cpu1.kern.syscall_4 1 1.02% 13.27% # number of syscalls executed +system.cpu1.kern.syscall_6 11 11.22% 24.49% # number of syscalls executed +system.cpu1.kern.syscall_17 5 5.10% 29.59% # number of syscalls executed +system.cpu1.kern.syscall_19 4 4.08% 33.67% # number of syscalls executed +system.cpu1.kern.syscall_20 2 2.04% 35.71% # number of syscalls executed +system.cpu1.kern.syscall_23 2 2.04% 37.76% # number of syscalls executed +system.cpu1.kern.syscall_24 2 2.04% 39.80% # number of syscalls executed +system.cpu1.kern.syscall_33 3 3.06% 42.86% # number of syscalls executed +system.cpu1.kern.syscall_45 15 15.31% 58.16% # number of syscalls executed +system.cpu1.kern.syscall_47 2 2.04% 60.20% # number of syscalls executed +system.cpu1.kern.syscall_48 2 2.04% 62.24% # number of syscalls executed +system.cpu1.kern.syscall_54 1 1.02% 63.27% # number of syscalls executed +system.cpu1.kern.syscall_59 1 1.02% 64.29% # number of syscalls executed +system.cpu1.kern.syscall_71 22 22.45% 86.73% # number of syscalls executed +system.cpu1.kern.syscall_74 7 7.14% 93.88% # number of syscalls executed +system.cpu1.kern.syscall_90 1 1.02% 94.90% # number of syscalls executed +system.cpu1.kern.syscall_92 2 2.04% 96.94% # number of syscalls executed +system.cpu1.kern.syscall_132 2 2.04% 98.98% # number of syscalls executed +system.cpu1.kern.syscall_144 1 1.02% 100.00% # number of syscalls executed +system.cpu1.not_idle_fraction 0.012799 # Percentage of non-idle cycles +system.cpu1.numCycles 1951174476000 # number of cpu cycles simulated +system.cpu1.num_insts 13758345 # Number of instructions executed +system.cpu1.num_refs 4385954 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). @@ -610,75 +543,80 @@ system.disk2.dma_read_txs 0 # Nu system.disk2.dma_write_bytes 8192 # Number of bytes transfered via DMA writes. system.disk2.dma_write_full_pages 1 # Number of full page size DMA writes. system.disk2.dma_write_txs 1 # Number of DMA write transactions. -system.l2c.ReadExReq_accesses 306499 # number of ReadExReq accesses(hits+misses) -system.l2c.ReadExReq_avg_miss_latency 12998.029396 # average ReadExReq miss latency -system.l2c.ReadExReq_avg_mshr_miss_latency 10997.988681 # average ReadExReq mshr miss latency -system.l2c.ReadExReq_hits 183694 # number of ReadExReq hits -system.l2c.ReadExReq_miss_latency 1596223000 # number of ReadExReq miss cycles -system.l2c.ReadExReq_miss_rate 0.400670 # miss rate for ReadExReq accesses -system.l2c.ReadExReq_misses 122805 # number of ReadExReq misses -system.l2c.ReadExReq_mshr_miss_latency 1350608000 # number of ReadExReq MSHR miss cycles -system.l2c.ReadExReq_mshr_miss_rate 0.400670 # mshr miss rate for ReadExReq accesses -system.l2c.ReadExReq_mshr_misses 122805 # number of ReadExReq MSHR misses -system.l2c.ReadReq_accesses 2751323 # number of ReadReq accesses(hits+misses) -system.l2c.ReadReq_avg_miss_latency 12999.901707 # average ReadReq miss latency -system.l2c.ReadReq_avg_mshr_miss_latency 10999.990968 # average ReadReq mshr miss latency -system.l2c.ReadReq_hits 1810263 # number of ReadReq hits -system.l2c.ReadReq_miss_latency 12233687500 # number of ReadReq miss cycles -system.l2c.ReadReq_miss_rate 0.342039 # miss rate for ReadReq accesses -system.l2c.ReadReq_misses 941060 # number of ReadReq misses -system.l2c.ReadReq_mshr_hits 11 # number of ReadReq MSHR hits -system.l2c.ReadReq_mshr_miss_latency 10351530500 # number of ReadReq MSHR miss cycles -system.l2c.ReadReq_mshr_miss_rate 0.342035 # mshr miss rate for ReadReq accesses -system.l2c.ReadReq_mshr_misses 941049 # number of ReadReq MSHR misses -system.l2c.ReadReq_mshr_uncacheable 6993 # number of ReadReq MSHR uncacheable -system.l2c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.l2c.ReadResp_mshr_uncacheable_latency 779629500 # number of ReadResp MSHR uncacheable cycles -system.l2c.WriteReq_mshr_uncacheable 12194 # number of WriteReq MSHR uncacheable -system.l2c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.l2c.WriteResp_mshr_uncacheable_latency 1356619000 # number of WriteResp MSHR uncacheable cycles -system.l2c.Writeback_accesses 429269 # number of Writeback accesses(hits+misses) -system.l2c.Writeback_hits 429256 # number of Writeback hits -system.l2c.Writeback_miss_rate 0.000030 # miss rate for Writeback accesses -system.l2c.Writeback_misses 13 # number of Writeback misses -system.l2c.Writeback_mshr_miss_rate 0.000030 # mshr miss rate for Writeback accesses -system.l2c.Writeback_mshr_misses 13 # number of Writeback MSHR misses +system.l2c.ReadExReq_accesses 297979 # number of ReadExReq accesses(hits+misses) +system.l2c.ReadExReq_avg_miss_latency 12000.808782 # average ReadExReq miss latency +system.l2c.ReadExReq_avg_mshr_miss_latency 11000.808782 # average ReadExReq mshr miss latency +system.l2c.ReadExReq_miss_latency 3575989000 # number of ReadExReq miss cycles +system.l2c.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.l2c.ReadExReq_misses 297979 # number of ReadExReq misses +system.l2c.ReadExReq_mshr_miss_latency 3278010000 # number of ReadExReq MSHR miss cycles +system.l2c.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.l2c.ReadExReq_mshr_misses 297979 # number of ReadExReq MSHR misses +system.l2c.ReadReq_accesses 2726406 # number of ReadReq accesses(hits+misses) +system.l2c.ReadReq_avg_miss_latency 12000.355770 # average ReadReq miss latency +system.l2c.ReadReq_avg_mshr_miss_latency 11000.235046 # average ReadReq mshr miss latency +system.l2c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.l2c.ReadReq_hits 1633004 # number of ReadReq hits +system.l2c.ReadReq_miss_latency 13121213000 # number of ReadReq miss cycles +system.l2c.ReadReq_miss_rate 0.401042 # miss rate for ReadReq accesses +system.l2c.ReadReq_misses 1093402 # number of ReadReq misses +system.l2c.ReadReq_mshr_hits 12 # number of ReadReq MSHR hits +system.l2c.ReadReq_mshr_miss_latency 12027679000 # number of ReadReq MSHR miss cycles +system.l2c.ReadReq_mshr_miss_rate 0.401042 # mshr miss rate for ReadReq accesses +system.l2c.ReadReq_mshr_misses 1093402 # number of ReadReq MSHR misses +system.l2c.ReadReq_mshr_uncacheable_latency 779744500 # number of ReadReq MSHR uncacheable cycles +system.l2c.UpgradeReq_accesses 125211 # number of UpgradeReq accesses(hits+misses) +system.l2c.UpgradeReq_avg_miss_latency 11388.943463 # average UpgradeReq miss latency +system.l2c.UpgradeReq_avg_mshr_miss_latency 11003.410244 # average UpgradeReq mshr miss latency +system.l2c.UpgradeReq_miss_latency 1426021000 # number of UpgradeReq miss cycles +system.l2c.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_misses 125211 # number of UpgradeReq misses +system.l2c.UpgradeReq_mshr_miss_latency 1377748000 # number of UpgradeReq MSHR miss cycles +system.l2c.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_mshr_misses 125211 # number of UpgradeReq MSHR misses +system.l2c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.l2c.WriteReq_mshr_uncacheable_latency 1551434500 # number of WriteReq MSHR uncacheable cycles +system.l2c.Writeback_accesses 416193 # number of Writeback accesses(hits+misses) +system.l2c.Writeback_miss_rate 1 # miss rate for Writeback accesses +system.l2c.Writeback_misses 416193 # number of Writeback misses +system.l2c.Writeback_mshr_miss_rate 1 # mshr miss rate for Writeback accesses +system.l2c.Writeback_mshr_misses 416193 # number of Writeback MSHR misses system.l2c.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.l2c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.l2c.avg_refs 2.277768 # Average number of references to valid blocks. +system.l2c.avg_refs 1.713697 # Average number of references to valid blocks. system.l2c.blocked_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_no_targets 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.l2c.cache_copies 0 # number of cache copies performed -system.l2c.demand_accesses 2751323 # number of demand (read+write) accesses -system.l2c.demand_avg_miss_latency 12999.901707 # average overall miss latency -system.l2c.demand_avg_mshr_miss_latency 10999.990968 # average overall mshr miss latency -system.l2c.demand_hits 1810263 # number of demand (read+write) hits -system.l2c.demand_miss_latency 12233687500 # number of demand (read+write) miss cycles -system.l2c.demand_miss_rate 0.342039 # miss rate for demand accesses -system.l2c.demand_misses 941060 # number of demand (read+write) misses -system.l2c.demand_mshr_hits 11 # number of demand (read+write) MSHR hits -system.l2c.demand_mshr_miss_latency 10351530500 # number of demand (read+write) MSHR miss cycles -system.l2c.demand_mshr_miss_rate 0.342035 # mshr miss rate for demand accesses -system.l2c.demand_mshr_misses 941049 # number of demand (read+write) MSHR misses +system.l2c.demand_accesses 3024385 # number of demand (read+write) accesses +system.l2c.demand_avg_miss_latency 12000.452788 # average overall miss latency +system.l2c.demand_avg_mshr_miss_latency 11000.357918 # average overall mshr miss latency +system.l2c.demand_hits 1633004 # number of demand (read+write) hits +system.l2c.demand_miss_latency 16697202000 # number of demand (read+write) miss cycles +system.l2c.demand_miss_rate 0.460054 # miss rate for demand accesses +system.l2c.demand_misses 1391381 # number of demand (read+write) misses +system.l2c.demand_mshr_hits 12 # number of demand (read+write) MSHR hits +system.l2c.demand_mshr_miss_latency 15305689000 # number of demand (read+write) MSHR miss cycles +system.l2c.demand_mshr_miss_rate 0.460054 # mshr miss rate for demand accesses +system.l2c.demand_mshr_misses 1391381 # number of demand (read+write) MSHR misses system.l2c.fast_writes 0 # number of fast writes performed system.l2c.mshr_cap_events 0 # number of times MSHR cap was activated system.l2c.no_allocate_misses 0 # Number of misses that were no-allocate -system.l2c.overall_accesses 3180592 # number of overall (read+write) accesses -system.l2c.overall_avg_miss_latency 12999.722126 # average overall miss latency -system.l2c.overall_avg_mshr_miss_latency 10999.990968 # average overall mshr miss latency -system.l2c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.l2c.overall_hits 2239519 # number of overall hits -system.l2c.overall_miss_latency 12233687500 # number of overall miss cycles -system.l2c.overall_miss_rate 0.295880 # miss rate for overall accesses -system.l2c.overall_misses 941073 # number of overall misses -system.l2c.overall_mshr_hits 11 # number of overall MSHR hits -system.l2c.overall_mshr_miss_latency 10351530500 # number of overall MSHR miss cycles -system.l2c.overall_mshr_miss_rate 0.295872 # mshr miss rate for overall accesses -system.l2c.overall_mshr_misses 941049 # number of overall MSHR misses -system.l2c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.l2c.overall_mshr_uncacheable_misses 19187 # number of overall MSHR uncacheable misses +system.l2c.overall_accesses 3024385 # number of overall (read+write) accesses +system.l2c.overall_avg_miss_latency 12000.452788 # average overall miss latency +system.l2c.overall_avg_mshr_miss_latency 11000.357918 # average overall mshr miss latency +system.l2c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.l2c.overall_hits 1633004 # number of overall hits +system.l2c.overall_miss_latency 16697202000 # number of overall miss cycles +system.l2c.overall_miss_rate 0.460054 # miss rate for overall accesses +system.l2c.overall_misses 1391381 # number of overall misses +system.l2c.overall_mshr_hits 12 # number of overall MSHR hits +system.l2c.overall_mshr_miss_latency 15305689000 # number of overall MSHR miss cycles +system.l2c.overall_mshr_miss_rate 0.460054 # mshr miss rate for overall accesses +system.l2c.overall_mshr_misses 1391381 # number of overall MSHR misses +system.l2c.overall_mshr_uncacheable_latency 2331179000 # number of overall MSHR uncacheable cycles +system.l2c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.l2c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.l2c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.l2c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -688,13 +626,13 @@ system.l2c.prefetcher.num_hwpf_issued 0 # nu system.l2c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.l2c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.l2c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.l2c.replacements 998318 # number of replacements -system.l2c.sampled_refs 1063854 # Sample count of references to valid blocks. +system.l2c.replacements 947502 # number of replacements +system.l2c.sampled_refs 965785 # Sample count of references to valid blocks. system.l2c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.l2c.tagsinuse 65469.787238 # Cycle average of tags in use -system.l2c.total_refs 2423213 # Total number of references to valid blocks. -system.l2c.warmup_cycle 3064127000 # Cycle when the warmup percentage was hit. -system.l2c.writebacks 79556 # number of writebacks +system.l2c.tagsinuse 16369.951624 # Cycle average of tags in use +system.l2c.total_refs 1655063 # Total number of references to valid blocks. +system.l2c.warmup_cycle 5421925000 # Cycle when the warmup percentage was hit. +system.l2c.writebacks 0 # number of writebacks system.tsunami.ethernet.coalescedRxDesc # average number of RxDesc's coalesced into each post system.tsunami.ethernet.coalescedRxIdle # average number of RxIdle's coalesced into each post system.tsunami.ethernet.coalescedRxOk # average number of RxOk's coalesced into each post diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr index af0df3710..e6ad9b469 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stderr @@ -1,5 +1,5 @@ -Listening for system connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 -0: system.remote_gdb.listener: listening for remote gdb #1 on port 7001 +Listening for system connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb on port 7001 +0: system.remote_gdb.listener: listening for remote gdb on port 7002 warn: Entering event queue @ 0. Starting simulation... -warn: 423901000: Trying to launch CPU number 1! +warn: 427086000: Trying to launch CPU number 1! diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout index 68b58c461..99539f3ea 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing-dual/stdout @@ -5,10 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:10:03 -M5 started Mon Jun 11 01:30:38 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual +M5 compiled Aug 3 2007 04:02:11 +M5 started Fri Aug 3 04:25:10 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing-dual Global frequency set at 1000000000000 ticks per second - 0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 -Exiting @ tick 1951129131000 because m5_exit instruction encountered +Exiting @ tick 1951367346000 because m5_exit instruction encountered diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini index c726f11fe..1992f65a2 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/config.ini @@ -35,7 +35,7 @@ side_b=system.membus.port[0] [system.cpu] type=TimingSimpleCPU -children=dcache dtb icache itb +children=dcache dtb icache itb tracer clock=500 cpu_id=0 defer_registration=false @@ -54,17 +54,15 @@ phase=0 profile=0 progress_interval=0 system=system +tracer=system.cpu.tracer dcache_port=system.cpu.dcache.cpu_side icache_port=system.cpu.icache.cpu_side [system.cpu.dcache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -82,12 +80,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu.dcache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -96,23 +92,15 @@ write_buffers=8 cpu_side=system.cpu.dcache_port mem_side=system.toL2Bus.port[2] -[system.cpu.dcache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu.dtb] type=AlphaDTB size=64 [system.cpu.icache] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=1 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -130,12 +118,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu.icache.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -144,15 +130,13 @@ write_buffers=8 cpu_side=system.cpu.icache_port mem_side=system.toL2Bus.port[1] -[system.cpu.icache.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu.itb] type=AlphaITB size=48 +[system.cpu.tracer] +type=ExeTracer + [system.disk0] type=IdeDisk children=image @@ -207,11 +191,9 @@ port=system.bridge.side_a system.tsunami.cchip.pio system.tsunami.pchip.pio syst [system.l2c] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=8 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -229,12 +211,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=4194304 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=16 trace_addr=0 @@ -798,7 +778,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=system.tsunami system=system -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=system.tsunami year_is_bcd=false pio=system.iobus.port[23] diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt index f72789e4b..958246a30 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/m5stats.txt @@ -1,74 +1,93 @@ ---------- Begin Simulation Statistics ---------- -host_inst_rate 62427 # Simulator instruction rate (inst/s) -host_seconds 961.73 # Real time elapsed on the host -host_tick_rate 1983042717 # Simulator tick rate (ticks/s) +host_inst_rate 631972 # Simulator instruction rate (inst/s) +host_mem_usage 219140 # Number of bytes of host memory used +host_seconds 95.00 # Real time elapsed on the host +host_tick_rate 20109299069 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_insts 60037406 # Number of instructions simulated -sim_seconds 1.907146 # Number of seconds simulated -sim_ticks 1907146437000 # Number of ticks simulated -system.cpu.dcache.ReadReq_accesses 9726331 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 13065.219101 # average ReadReq miss latency -system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12065.192690 # average ReadReq mshr miss latency -system.cpu.dcache.ReadReq_hits 7984648 # number of ReadReq hits -system.cpu.dcache.ReadReq_miss_latency 22755470000 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.179069 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 1741683 # number of ReadReq misses -system.cpu.dcache.ReadReq_mshr_miss_latency 21013741000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.179069 # mshr miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_mshr_misses 1741683 # number of ReadReq MSHR misses -system.cpu.dcache.ReadReq_mshr_uncacheable 6727 # number of ReadReq MSHR uncacheable -system.cpu.dcache.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu.dcache.ReadResp_mshr_uncacheable_latency 824099000 # number of ReadResp MSHR uncacheable cycles -system.cpu.dcache.WriteReq_accesses 6350552 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 12768.106941 # average WriteReq miss latency -system.cpu.dcache.WriteReq_avg_mshr_miss_latency 11768.067509 # average WriteReq mshr miss latency -system.cpu.dcache.WriteReq_hits 6046235 # number of WriteReq hits -system.cpu.dcache.WriteReq_miss_latency 3885552000 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.047920 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 304317 # number of WriteReq misses -system.cpu.dcache.WriteReq_mshr_miss_latency 3581223000 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.047920 # mshr miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_mshr_misses 304317 # number of WriteReq MSHR misses -system.cpu.dcache.WriteReq_mshr_uncacheable 9438 # number of WriteReq MSHR uncacheable -system.cpu.dcache.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu.dcache.WriteResp_mshr_uncacheable_latency 1154484000 # number of WriteResp MSHR uncacheable cycles +sim_insts 60034774 # Number of instructions simulated +sim_seconds 1.910310 # Number of seconds simulated +sim_ticks 1910309711000 # Number of ticks simulated +system.cpu.dcache.LoadLockedReq_accesses 200211 # number of LoadLockedReq accesses(hits+misses) +system.cpu.dcache.LoadLockedReq_avg_miss_latency 13960.656682 # average LoadLockedReq miss latency +system.cpu.dcache.LoadLockedReq_avg_mshr_miss_latency 12960.656682 # average LoadLockedReq mshr miss latency +system.cpu.dcache.LoadLockedReq_hits 182851 # number of LoadLockedReq hits +system.cpu.dcache.LoadLockedReq_miss_latency 242357000 # number of LoadLockedReq miss cycles +system.cpu.dcache.LoadLockedReq_miss_rate 0.086709 # miss rate for LoadLockedReq accesses +system.cpu.dcache.LoadLockedReq_misses 17360 # number of LoadLockedReq misses +system.cpu.dcache.LoadLockedReq_mshr_miss_latency 224997000 # number of LoadLockedReq MSHR miss cycles +system.cpu.dcache.LoadLockedReq_mshr_miss_rate 0.086709 # mshr miss rate for LoadLockedReq accesses +system.cpu.dcache.LoadLockedReq_mshr_misses 17360 # number of LoadLockedReq MSHR misses +system.cpu.dcache.ReadReq_accesses 9525872 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 13240.454388 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 12240.427719 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu.dcache.ReadReq_hits 7801048 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 22837453500 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.181067 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 1724824 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 21112583500 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.181067 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 1724824 # number of ReadReq MSHR misses +system.cpu.dcache.ReadReq_mshr_uncacheable_latency 830826000 # number of ReadReq MSHR uncacheable cycles +system.cpu.dcache.StoreCondReq_accesses 199189 # number of StoreCondReq accesses(hits+misses) +system.cpu.dcache.StoreCondReq_avg_miss_latency 14000.798456 # average StoreCondReq miss latency +system.cpu.dcache.StoreCondReq_avg_mshr_miss_latency 13000.798456 # average StoreCondReq mshr miss latency +system.cpu.dcache.StoreCondReq_hits 169131 # number of StoreCondReq hits +system.cpu.dcache.StoreCondReq_miss_latency 420836000 # number of StoreCondReq miss cycles +system.cpu.dcache.StoreCondReq_miss_rate 0.150902 # miss rate for StoreCondReq accesses +system.cpu.dcache.StoreCondReq_misses 30058 # number of StoreCondReq misses +system.cpu.dcache.StoreCondReq_mshr_miss_latency 390778000 # number of StoreCondReq MSHR miss cycles +system.cpu.dcache.StoreCondReq_mshr_miss_rate 0.150902 # mshr miss rate for StoreCondReq accesses +system.cpu.dcache.StoreCondReq_mshr_misses 30058 # number of StoreCondReq MSHR misses +system.cpu.dcache.WriteReq_accesses 6151132 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 14000.947966 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 13000.947966 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu.dcache.WriteReq_hits 5750801 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 5605013500 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.065082 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 400331 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 5204682500 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.065082 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 400331 # number of WriteReq MSHR misses +system.cpu.dcache.WriteReq_mshr_uncacheable_latency 1164414500 # number of WriteReq MSHR uncacheable cycles system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 6.857760 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 6.854770 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 16076883 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 13021.027370 # average overall miss latency -system.cpu.dcache.demand_avg_mshr_miss_latency 12020.999022 # average overall mshr miss latency -system.cpu.dcache.demand_hits 14030883 # number of demand (read+write) hits -system.cpu.dcache.demand_miss_latency 26641022000 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.127263 # miss rate for demand accesses -system.cpu.dcache.demand_misses 2046000 # number of demand (read+write) misses +system.cpu.dcache.demand_accesses 15677004 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 13383.714129 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 12383.692484 # average overall mshr miss latency +system.cpu.dcache.demand_hits 13551849 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 28442467000 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.135559 # miss rate for demand accesses +system.cpu.dcache.demand_misses 2125155 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.dcache.demand_mshr_miss_latency 24594964000 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.127263 # mshr miss rate for demand accesses -system.cpu.dcache.demand_mshr_misses 2046000 # number of demand (read+write) MSHR misses +system.cpu.dcache.demand_mshr_miss_latency 26317266000 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.135559 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 2125155 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 16076883 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 13021.027370 # average overall miss latency -system.cpu.dcache.overall_avg_mshr_miss_latency 12020.999022 # average overall mshr miss latency -system.cpu.dcache.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu.dcache.overall_hits 14030883 # number of overall hits -system.cpu.dcache.overall_miss_latency 26641022000 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.127263 # miss rate for overall accesses -system.cpu.dcache.overall_misses 2046000 # number of overall misses +system.cpu.dcache.overall_accesses 15677004 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 13383.714129 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 12383.692484 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 13551849 # number of overall hits +system.cpu.dcache.overall_miss_latency 28442467000 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.135559 # miss rate for overall accesses +system.cpu.dcache.overall_misses 2125155 # number of overall misses system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.dcache.overall_mshr_miss_latency 24594964000 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.127263 # mshr miss rate for overall accesses -system.cpu.dcache.overall_mshr_misses 2046000 # number of overall MSHR misses -system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu.dcache.overall_mshr_uncacheable_misses 16165 # number of overall MSHR uncacheable misses +system.cpu.dcache.overall_mshr_miss_latency 26317266000 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.135559 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 2125155 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 1995240500 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -78,95 +97,69 @@ system.cpu.dcache.prefetcher.num_hwpf_issued 0 system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.dcache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu.dcache.protocol.read_invalid 1741683 # read misses to invalid blocks -system.cpu.dcache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu.dcache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu.dcache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu.dcache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu.dcache.protocol.snoop_read_exclusive 9 # read snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_read_modified 15 # read snoops on modified blocks -system.cpu.dcache.protocol.snoop_read_owned 4 # read snoops on owned blocks -system.cpu.dcache.protocol.snoop_read_shared 92 # read snoops on shared blocks -system.cpu.dcache.protocol.snoop_readex_exclusive 0 # readEx snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu.dcache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu.dcache.protocol.snoop_readex_shared 0 # readEx snoops on shared blocks -system.cpu.dcache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu.dcache.protocol.snoop_upgrade_shared 0 # upgradee snoops on shared blocks -system.cpu.dcache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu.dcache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu.dcache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu.dcache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu.dcache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu.dcache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu.dcache.protocol.write_invalid 304305 # write misses to invalid blocks -system.cpu.dcache.protocol.write_owned 8 # write misses to owned blocks -system.cpu.dcache.protocol.write_shared 4 # write misses to shared blocks -system.cpu.dcache.replacements 2045476 # number of replacements -system.cpu.dcache.sampled_refs 2045988 # Sample count of references to valid blocks. +system.cpu.dcache.replacements 2046194 # number of replacements +system.cpu.dcache.sampled_refs 2046706 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.dcache.tagsinuse 511.987904 # Cycle average of tags in use -system.cpu.dcache.total_refs 14030895 # Total number of references to valid blocks. -system.cpu.dcache.warmup_cycle 57945000 # Cycle when the warmup percentage was hit. -system.cpu.dcache.writebacks 429989 # number of writebacks +system.cpu.dcache.tagsinuse 511.987834 # Cycle average of tags in use +system.cpu.dcache.total_refs 14029698 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 58297000 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 429991 # number of writebacks system.cpu.dtb.accesses 1020787 # DTB accesses system.cpu.dtb.acv 367 # DTB access violations -system.cpu.dtb.hits 16057425 # DTB hits +system.cpu.dtb.hits 16056951 # DTB hits system.cpu.dtb.misses 11471 # DTB misses system.cpu.dtb.read_accesses 728856 # DTB read accesses system.cpu.dtb.read_acv 210 # DTB read access violations -system.cpu.dtb.read_hits 9706740 # DTB read hits +system.cpu.dtb.read_hits 9706492 # DTB read hits system.cpu.dtb.read_misses 10329 # DTB read misses system.cpu.dtb.write_accesses 291931 # DTB write accesses system.cpu.dtb.write_acv 157 # DTB write access violations -system.cpu.dtb.write_hits 6350685 # DTB write hits +system.cpu.dtb.write_hits 6350459 # DTB write hits system.cpu.dtb.write_misses 1142 # DTB write misses -system.cpu.icache.ReadReq_accesses 60037407 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 12029.456206 # average ReadReq miss latency -system.cpu.icache.ReadReq_avg_mshr_miss_latency 11028.713640 # average ReadReq mshr miss latency -system.cpu.icache.ReadReq_hits 59110217 # number of ReadReq hits -system.cpu.icache.ReadReq_miss_latency 11153591500 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.015444 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 927190 # number of ReadReq misses -system.cpu.icache.ReadReq_mshr_miss_latency 10225713000 # number of ReadReq MSHR miss cycles -system.cpu.icache.ReadReq_mshr_miss_rate 0.015444 # mshr miss rate for ReadReq accesses -system.cpu.icache.ReadReq_mshr_misses 927190 # number of ReadReq MSHR misses +system.cpu.icache.ReadReq_accesses 60034775 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 12033.060657 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 11032.326155 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 59106935 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 11164755000 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.015455 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 927840 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 10236233500 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.015455 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 927840 # number of ReadReq MSHR misses system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu.icache.avg_refs 63.763003 # Average number of references to valid blocks. +system.cpu.icache.avg_refs 63.714789 # Average number of references to valid blocks. system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 60037407 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 12029.456206 # average overall miss latency -system.cpu.icache.demand_avg_mshr_miss_latency 11028.713640 # average overall mshr miss latency -system.cpu.icache.demand_hits 59110217 # number of demand (read+write) hits -system.cpu.icache.demand_miss_latency 11153591500 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.015444 # miss rate for demand accesses -system.cpu.icache.demand_misses 927190 # number of demand (read+write) misses +system.cpu.icache.demand_accesses 60034775 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 12033.060657 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 11032.326155 # average overall mshr miss latency +system.cpu.icache.demand_hits 59106935 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 11164755000 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.015455 # miss rate for demand accesses +system.cpu.icache.demand_misses 927840 # number of demand (read+write) misses system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu.icache.demand_mshr_miss_latency 10225713000 # number of demand (read+write) MSHR miss cycles -system.cpu.icache.demand_mshr_miss_rate 0.015444 # mshr miss rate for demand accesses -system.cpu.icache.demand_mshr_misses 927190 # number of demand (read+write) MSHR misses +system.cpu.icache.demand_mshr_miss_latency 10236233500 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.015455 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 927840 # number of demand (read+write) MSHR misses system.cpu.icache.fast_writes 0 # number of fast writes performed system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.icache.overall_accesses 60037407 # number of overall (read+write) accesses -system.cpu.icache.overall_avg_miss_latency 12029.456206 # average overall miss latency -system.cpu.icache.overall_avg_mshr_miss_latency 11028.713640 # average overall mshr miss latency +system.cpu.icache.overall_accesses 60034775 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 12033.060657 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 11032.326155 # average overall mshr miss latency system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency -system.cpu.icache.overall_hits 59110217 # number of overall hits -system.cpu.icache.overall_miss_latency 11153591500 # number of overall miss cycles -system.cpu.icache.overall_miss_rate 0.015444 # miss rate for overall accesses -system.cpu.icache.overall_misses 927190 # number of overall misses +system.cpu.icache.overall_hits 59106935 # number of overall hits +system.cpu.icache.overall_miss_latency 11164755000 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.015455 # miss rate for overall accesses +system.cpu.icache.overall_misses 927840 # number of overall misses system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu.icache.overall_mshr_miss_latency 10225713000 # number of overall MSHR miss cycles -system.cpu.icache.overall_mshr_miss_rate 0.015444 # mshr miss rate for overall accesses -system.cpu.icache.overall_mshr_misses 927190 # number of overall MSHR misses +system.cpu.icache.overall_mshr_miss_latency 10236233500 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.015455 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 927840 # number of overall MSHR misses system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache @@ -178,45 +171,19 @@ system.cpu.icache.prefetcher.num_hwpf_issued 0 system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu.icache.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu.icache.protocol.read_invalid 927190 # read misses to invalid blocks -system.cpu.icache.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu.icache.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu.icache.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu.icache.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu.icache.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu.icache.protocol.snoop_read_exclusive 644 # read snoops on exclusive blocks -system.cpu.icache.protocol.snoop_read_modified 0 # read snoops on modified blocks -system.cpu.icache.protocol.snoop_read_owned 0 # read snoops on owned blocks -system.cpu.icache.protocol.snoop_read_shared 1040 # read snoops on shared blocks -system.cpu.icache.protocol.snoop_readex_exclusive 146 # readEx snoops on exclusive blocks -system.cpu.icache.protocol.snoop_readex_modified 0 # readEx snoops on modified blocks -system.cpu.icache.protocol.snoop_readex_owned 0 # readEx snoops on owned blocks -system.cpu.icache.protocol.snoop_readex_shared 2 # readEx snoops on shared blocks -system.cpu.icache.protocol.snoop_upgrade_owned 0 # upgrade snoops on owned blocks -system.cpu.icache.protocol.snoop_upgrade_shared 12 # upgradee snoops on shared blocks -system.cpu.icache.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu.icache.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu.icache.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu.icache.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu.icache.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu.icache.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu.icache.protocol.write_invalid 0 # write misses to invalid blocks -system.cpu.icache.protocol.write_owned 0 # write misses to owned blocks -system.cpu.icache.protocol.write_shared 0 # write misses to shared blocks -system.cpu.icache.replacements 926519 # number of replacements -system.cpu.icache.sampled_refs 927030 # Sample count of references to valid blocks. +system.cpu.icache.replacements 927169 # number of replacements +system.cpu.icache.sampled_refs 927680 # Sample count of references to valid blocks. system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu.icache.tagsinuse 508.761542 # Cycle average of tags in use -system.cpu.icache.total_refs 59110217 # Total number of references to valid blocks. -system.cpu.icache.warmup_cycle 34634685000 # Cycle when the warmup percentage was hit. +system.cpu.icache.tagsinuse 508.749374 # Cycle average of tags in use +system.cpu.icache.total_refs 59106935 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 35000367000 # Cycle when the warmup percentage was hit. system.cpu.icache.writebacks 0 # number of writebacks -system.cpu.idle_fraction 0.940784 # Percentage of idle cycles -system.cpu.itb.accesses 4977586 # ITB accesses +system.cpu.idle_fraction 0.939637 # Percentage of idle cycles +system.cpu.itb.accesses 4978395 # ITB accesses system.cpu.itb.acv 184 # ITB acv -system.cpu.itb.hits 4972580 # ITB hits +system.cpu.itb.hits 4973389 # ITB hits system.cpu.itb.misses 5006 # ITB misses -system.cpu.kern.callpal 192752 # number of callpals executed +system.cpu.kern.callpal 192813 # number of callpals executed system.cpu.kern.callpal_cserve 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrmces 1 0.00% 0.00% # number of callpals executed system.cpu.kern.callpal_wrfen 1 0.00% 0.00% # number of callpals executed @@ -224,50 +191,50 @@ system.cpu.kern.callpal_wrvptptr 1 0.00% 0.00% # nu system.cpu.kern.callpal_swpctx 4176 2.17% 2.17% # number of callpals executed system.cpu.kern.callpal_tbi 54 0.03% 2.20% # number of callpals executed system.cpu.kern.callpal_wrent 7 0.00% 2.20% # number of callpals executed -system.cpu.kern.callpal_swpipl 175824 91.22% 93.42% # number of callpals executed -system.cpu.kern.callpal_rdps 6824 3.54% 96.96% # number of callpals executed +system.cpu.kern.callpal_swpipl 175877 91.22% 93.42% # number of callpals executed +system.cpu.kern.callpal_rdps 6828 3.54% 96.96% # number of callpals executed system.cpu.kern.callpal_wrkgp 1 0.00% 96.96% # number of callpals executed system.cpu.kern.callpal_wrusp 7 0.00% 96.96% # number of callpals executed system.cpu.kern.callpal_rdusp 9 0.00% 96.97% # number of callpals executed system.cpu.kern.callpal_whami 2 0.00% 96.97% # number of callpals executed -system.cpu.kern.callpal_rti 5148 2.67% 99.64% # number of callpals executed +system.cpu.kern.callpal_rti 5152 2.67% 99.64% # number of callpals executed system.cpu.kern.callpal_callsys 515 0.27% 99.91% # number of callpals executed system.cpu.kern.callpal_imb 181 0.09% 100.00% # number of callpals executed system.cpu.kern.inst.arm 0 # number of arm instructions executed -system.cpu.kern.inst.hwrei 211836 # number of hwrei instructions executed +system.cpu.kern.inst.hwrei 211901 # number of hwrei instructions executed system.cpu.kern.inst.quiesce 6181 # number of quiesce instructions executed -system.cpu.kern.ipl_count 183027 # number of times we switched to this ipl -system.cpu.kern.ipl_count_0 74862 40.90% 40.90% # number of times we switched to this ipl +system.cpu.kern.ipl_count 183088 # number of times we switched to this ipl +system.cpu.kern.ipl_count_0 74875 40.90% 40.90% # number of times we switched to this ipl system.cpu.kern.ipl_count_21 131 0.07% 40.97% # number of times we switched to this ipl -system.cpu.kern.ipl_count_22 1923 1.05% 42.02% # number of times we switched to this ipl -system.cpu.kern.ipl_count_31 106111 57.98% 100.00% # number of times we switched to this ipl -system.cpu.kern.ipl_good 149044 # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_0 73495 49.31% 49.31% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_count_22 1927 1.05% 42.02% # number of times we switched to this ipl +system.cpu.kern.ipl_count_31 106155 57.98% 100.00% # number of times we switched to this ipl +system.cpu.kern.ipl_good 149074 # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_0 73508 49.31% 49.31% # number of times we switched to this ipl from a different ipl system.cpu.kern.ipl_good_21 131 0.09% 49.40% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_22 1923 1.29% 50.69% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_good_31 73495 49.31% 100.00% # number of times we switched to this ipl from a different ipl -system.cpu.kern.ipl_ticks 1907145727000 # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_0 1851261210000 97.07% 97.07% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_21 73754500 0.00% 97.07% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_22 531976500 0.03% 97.10% # number of cycles we spent at this ipl -system.cpu.kern.ipl_ticks_31 55278786000 2.90% 100.00% # number of cycles we spent at this ipl -system.cpu.kern.ipl_used_0 0.981740 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.ipl_good_22 1927 1.29% 50.69% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_good_31 73508 49.31% 100.00% # number of times we switched to this ipl from a different ipl +system.cpu.kern.ipl_ticks 1910308997000 # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_0 1853401678500 97.02% 97.02% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_21 78202500 0.00% 97.03% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_22 538133000 0.03% 97.05% # number of cycles we spent at this ipl +system.cpu.kern.ipl_ticks_31 56290983000 2.95% 100.00% # number of cycles we spent at this ipl +system.cpu.kern.ipl_used_0 0.981743 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_21 1 # fraction of swpipl calls that actually changed the ipl system.cpu.kern.ipl_used_22 1 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.ipl_used_31 0.692624 # fraction of swpipl calls that actually changed the ipl -system.cpu.kern.mode_good_kernel 1910 -system.cpu.kern.mode_good_user 1740 +system.cpu.kern.ipl_used_31 0.692459 # fraction of swpipl calls that actually changed the ipl +system.cpu.kern.mode_good_kernel 1908 +system.cpu.kern.mode_good_user 1738 system.cpu.kern.mode_good_idle 170 -system.cpu.kern.mode_switch_kernel 5894 # number of protection mode switches -system.cpu.kern.mode_switch_user 1740 # number of protection mode switches -system.cpu.kern.mode_switch_idle 2096 # number of protection mode switches -system.cpu.kern.mode_switch_good 1.405165 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_kernel 0.324058 # fraction of useful protection mode switches +system.cpu.kern.mode_switch_kernel 5896 # number of protection mode switches +system.cpu.kern.mode_switch_user 1738 # number of protection mode switches +system.cpu.kern.mode_switch_idle 2098 # number of protection mode switches +system.cpu.kern.mode_switch_good 1.404639 # fraction of useful protection mode switches +system.cpu.kern.mode_switch_good_kernel 0.323609 # fraction of useful protection mode switches system.cpu.kern.mode_switch_good_user 1 # fraction of useful protection mode switches -system.cpu.kern.mode_switch_good_idle 0.081107 # fraction of useful protection mode switches -system.cpu.kern.mode_ticks_kernel 42657550000 2.24% 2.24% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_user 4648649000 0.24% 2.48% # number of ticks spent at the given mode -system.cpu.kern.mode_ticks_idle 1859839526000 97.52% 100.00% # number of ticks spent at the given mode +system.cpu.kern.mode_switch_good_idle 0.081030 # fraction of useful protection mode switches +system.cpu.kern.mode_ticks_kernel 43115749000 2.26% 2.26% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_user 4716926000 0.25% 2.50% # number of ticks spent at the given mode +system.cpu.kern.mode_ticks_idle 1862476320000 97.50% 100.00% # number of ticks spent at the given mode system.cpu.kern.swap_context 4177 # number of times the context was actually changed system.cpu.kern.syscall 326 # number of syscalls executed system.cpu.kern.syscall_2 8 2.45% 2.45% # number of syscalls executed @@ -300,10 +267,10 @@ system.cpu.kern.syscall_98 2 0.61% 97.55% # nu system.cpu.kern.syscall_132 4 1.23% 98.77% # number of syscalls executed system.cpu.kern.syscall_144 2 0.61% 99.39% # number of syscalls executed system.cpu.kern.syscall_147 2 0.61% 100.00% # number of syscalls executed -system.cpu.not_idle_fraction 0.059216 # Percentage of non-idle cycles -system.cpu.numCycles 1907146437000 # number of cpu cycles simulated -system.cpu.num_insts 60037406 # Number of instructions executed -system.cpu.num_refs 16305563 # Number of memory references +system.cpu.not_idle_fraction 0.060363 # Percentage of non-idle cycles +system.cpu.numCycles 1910309711000 # number of cpu cycles simulated +system.cpu.num_insts 60034774 # Number of instructions executed +system.cpu.num_refs 16305091 # Number of memory references system.disk0.dma_read_bytes 1024 # Number of bytes transfered via DMA reads (not PRD). system.disk0.dma_read_full_pages 0 # Number of full page size DMA reads (not PRD). system.disk0.dma_read_txs 1 # Number of DMA read transactions (not PRD). @@ -316,70 +283,79 @@ system.disk2.dma_read_txs 0 # Nu system.disk2.dma_write_bytes 8192 # Number of bytes transfered via DMA writes. system.disk2.dma_write_full_pages 1 # Number of full page size DMA writes. system.disk2.dma_write_txs 1 # Number of DMA write transactions. -system.l2c.ReadExReq_accesses 304305 # number of ReadExReq accesses(hits+misses) -system.l2c.ReadExReq_avg_miss_latency 13000.153945 # average ReadExReq miss latency -system.l2c.ReadExReq_avg_mshr_miss_latency 11000.153945 # average ReadExReq mshr miss latency -system.l2c.ReadExReq_hits 187380 # number of ReadExReq hits -system.l2c.ReadExReq_miss_latency 1520043000 # number of ReadExReq miss cycles -system.l2c.ReadExReq_miss_rate 0.384236 # miss rate for ReadExReq accesses -system.l2c.ReadExReq_misses 116925 # number of ReadExReq misses -system.l2c.ReadExReq_mshr_miss_latency 1286193000 # number of ReadExReq MSHR miss cycles -system.l2c.ReadExReq_mshr_miss_rate 0.384236 # mshr miss rate for ReadExReq accesses -system.l2c.ReadExReq_mshr_misses 116925 # number of ReadExReq MSHR misses -system.l2c.ReadReq_accesses 2668854 # number of ReadReq accesses(hits+misses) -system.l2c.ReadReq_avg_miss_latency 13000.065889 # average ReadReq miss latency -system.l2c.ReadReq_avg_mshr_miss_latency 11000.065889 # average ReadReq mshr miss latency -system.l2c.ReadReq_hits 1727874 # number of ReadReq hits -system.l2c.ReadReq_miss_latency 12232802000 # number of ReadReq miss cycles -system.l2c.ReadReq_miss_rate 0.352578 # miss rate for ReadReq accesses -system.l2c.ReadReq_misses 940980 # number of ReadReq misses -system.l2c.ReadReq_mshr_miss_latency 10350842000 # number of ReadReq MSHR miss cycles -system.l2c.ReadReq_mshr_miss_rate 0.352578 # mshr miss rate for ReadReq accesses -system.l2c.ReadReq_mshr_misses 940980 # number of ReadReq MSHR misses -system.l2c.ReadReq_mshr_uncacheable 6727 # number of ReadReq MSHR uncacheable -system.l2c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.l2c.ReadResp_mshr_uncacheable_latency 750102000 # number of ReadResp MSHR uncacheable cycles -system.l2c.WriteReq_mshr_uncacheable 9438 # number of WriteReq MSHR uncacheable -system.l2c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.l2c.WriteResp_mshr_uncacheable_latency 1050666000 # number of WriteResp MSHR uncacheable cycles -system.l2c.Writeback_accesses 429989 # number of Writeback accesses(hits+misses) -system.l2c.Writeback_hits 429989 # number of Writeback hits +system.l2c.ReadExReq_accesses 304522 # number of ReadExReq accesses(hits+misses) +system.l2c.ReadExReq_avg_miss_latency 12000.719160 # average ReadExReq miss latency +system.l2c.ReadExReq_avg_mshr_miss_latency 11000.719160 # average ReadExReq mshr miss latency +system.l2c.ReadExReq_miss_latency 3654483000 # number of ReadExReq miss cycles +system.l2c.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.l2c.ReadExReq_misses 304522 # number of ReadExReq misses +system.l2c.ReadExReq_mshr_miss_latency 3349961000 # number of ReadExReq MSHR miss cycles +system.l2c.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.l2c.ReadExReq_mshr_misses 304522 # number of ReadExReq MSHR misses +system.l2c.ReadReq_accesses 2670005 # number of ReadReq accesses(hits+misses) +system.l2c.ReadReq_avg_miss_latency 12000.233269 # average ReadReq miss latency +system.l2c.ReadReq_avg_mshr_miss_latency 11000.233269 # average ReadReq mshr miss latency +system.l2c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.l2c.ReadReq_hits 1568273 # number of ReadReq hits +system.l2c.ReadReq_miss_latency 13221041000 # number of ReadReq miss cycles +system.l2c.ReadReq_miss_rate 0.412633 # miss rate for ReadReq accesses +system.l2c.ReadReq_misses 1101732 # number of ReadReq misses +system.l2c.ReadReq_mshr_miss_latency 12119309000 # number of ReadReq MSHR miss cycles +system.l2c.ReadReq_mshr_miss_rate 0.412633 # mshr miss rate for ReadReq accesses +system.l2c.ReadReq_mshr_misses 1101732 # number of ReadReq MSHR misses +system.l2c.ReadReq_mshr_uncacheable_latency 750102000 # number of ReadReq MSHR uncacheable cycles +system.l2c.UpgradeReq_accesses 125867 # number of UpgradeReq accesses(hits+misses) +system.l2c.UpgradeReq_avg_miss_latency 11999.892744 # average UpgradeReq miss latency +system.l2c.UpgradeReq_avg_mshr_miss_latency 11000.750793 # average UpgradeReq mshr miss latency +system.l2c.UpgradeReq_miss_latency 1510390500 # number of UpgradeReq miss cycles +system.l2c.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_misses 125867 # number of UpgradeReq misses +system.l2c.UpgradeReq_mshr_miss_latency 1384631500 # number of UpgradeReq MSHR miss cycles +system.l2c.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_mshr_misses 125867 # number of UpgradeReq MSHR misses +system.l2c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.l2c.WriteReq_mshr_uncacheable_latency 1051110500 # number of WriteReq MSHR uncacheable cycles +system.l2c.Writeback_accesses 429991 # number of Writeback accesses(hits+misses) +system.l2c.Writeback_miss_rate 1 # miss rate for Writeback accesses +system.l2c.Writeback_misses 429991 # number of Writeback misses +system.l2c.Writeback_mshr_miss_rate 1 # mshr miss rate for Writeback accesses +system.l2c.Writeback_mshr_misses 429991 # number of Writeback MSHR misses system.l2c.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked system.l2c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.l2c.avg_refs 2.216875 # Average number of references to valid blocks. +system.l2c.avg_refs 1.660842 # Average number of references to valid blocks. system.l2c.blocked_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_no_targets 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.l2c.cache_copies 0 # number of cache copies performed -system.l2c.demand_accesses 2668854 # number of demand (read+write) accesses -system.l2c.demand_avg_miss_latency 13000.065889 # average overall miss latency -system.l2c.demand_avg_mshr_miss_latency 11000.065889 # average overall mshr miss latency -system.l2c.demand_hits 1727874 # number of demand (read+write) hits -system.l2c.demand_miss_latency 12232802000 # number of demand (read+write) miss cycles -system.l2c.demand_miss_rate 0.352578 # miss rate for demand accesses -system.l2c.demand_misses 940980 # number of demand (read+write) misses +system.l2c.demand_accesses 2974527 # number of demand (read+write) accesses +system.l2c.demand_avg_miss_latency 12000.338488 # average overall miss latency +system.l2c.demand_avg_mshr_miss_latency 11000.338488 # average overall mshr miss latency +system.l2c.demand_hits 1568273 # number of demand (read+write) hits +system.l2c.demand_miss_latency 16875524000 # number of demand (read+write) miss cycles +system.l2c.demand_miss_rate 0.472766 # miss rate for demand accesses +system.l2c.demand_misses 1406254 # number of demand (read+write) misses system.l2c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.l2c.demand_mshr_miss_latency 10350842000 # number of demand (read+write) MSHR miss cycles -system.l2c.demand_mshr_miss_rate 0.352578 # mshr miss rate for demand accesses -system.l2c.demand_mshr_misses 940980 # number of demand (read+write) MSHR misses +system.l2c.demand_mshr_miss_latency 15469270000 # number of demand (read+write) MSHR miss cycles +system.l2c.demand_mshr_miss_rate 0.472766 # mshr miss rate for demand accesses +system.l2c.demand_mshr_misses 1406254 # number of demand (read+write) MSHR misses system.l2c.fast_writes 0 # number of fast writes performed system.l2c.mshr_cap_events 0 # number of times MSHR cap was activated system.l2c.no_allocate_misses 0 # Number of misses that were no-allocate -system.l2c.overall_accesses 3098843 # number of overall (read+write) accesses -system.l2c.overall_avg_miss_latency 13000.065889 # average overall miss latency -system.l2c.overall_avg_mshr_miss_latency 11000.065889 # average overall mshr miss latency -system.l2c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.l2c.overall_hits 2157863 # number of overall hits -system.l2c.overall_miss_latency 12232802000 # number of overall miss cycles -system.l2c.overall_miss_rate 0.303655 # miss rate for overall accesses -system.l2c.overall_misses 940980 # number of overall misses +system.l2c.overall_accesses 2974527 # number of overall (read+write) accesses +system.l2c.overall_avg_miss_latency 12000.338488 # average overall miss latency +system.l2c.overall_avg_mshr_miss_latency 11000.338488 # average overall mshr miss latency +system.l2c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.l2c.overall_hits 1568273 # number of overall hits +system.l2c.overall_miss_latency 16875524000 # number of overall miss cycles +system.l2c.overall_miss_rate 0.472766 # miss rate for overall accesses +system.l2c.overall_misses 1406254 # number of overall misses system.l2c.overall_mshr_hits 0 # number of overall MSHR hits -system.l2c.overall_mshr_miss_latency 10350842000 # number of overall MSHR miss cycles -system.l2c.overall_mshr_miss_rate 0.303655 # mshr miss rate for overall accesses -system.l2c.overall_mshr_misses 940980 # number of overall MSHR misses -system.l2c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.l2c.overall_mshr_uncacheable_misses 16165 # number of overall MSHR uncacheable misses +system.l2c.overall_mshr_miss_latency 15469270000 # number of overall MSHR miss cycles +system.l2c.overall_mshr_miss_rate 0.472766 # mshr miss rate for overall accesses +system.l2c.overall_mshr_misses 1406254 # number of overall MSHR misses +system.l2c.overall_mshr_uncacheable_latency 1801212500 # number of overall MSHR uncacheable cycles +system.l2c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.l2c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.l2c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.l2c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -389,13 +365,13 @@ system.l2c.prefetcher.num_hwpf_issued 0 # nu system.l2c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.l2c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.l2c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.l2c.replacements 992369 # number of replacements -system.l2c.sampled_refs 1057905 # Sample count of references to valid blocks. +system.l2c.replacements 947259 # number of replacements +system.l2c.sampled_refs 965538 # Sample count of references to valid blocks. system.l2c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.l2c.tagsinuse 65468.856552 # Cycle average of tags in use -system.l2c.total_refs 2345243 # Total number of references to valid blocks. -system.l2c.warmup_cycle 3045832000 # Cycle when the warmup percentage was hit. -system.l2c.writebacks 74072 # number of writebacks +system.l2c.tagsinuse 15874.904757 # Cycle average of tags in use +system.l2c.total_refs 1603606 # Total number of references to valid blocks. +system.l2c.warmup_cycle 4106790000 # Cycle when the warmup percentage was hit. +system.l2c.writebacks 0 # number of writebacks system.tsunami.ethernet.coalescedRxDesc # average number of RxDesc's coalesced into each post system.tsunami.ethernet.coalescedRxIdle # average number of RxIdle's coalesced into each post system.tsunami.ethernet.coalescedRxOk # average number of RxOk's coalesced into each post diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr index f34493a86..32120d9d6 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stderr @@ -1,3 +1,3 @@ -Listening for system connection on port 3456 -0: system.remote_gdb.listener: listening for remote gdb #0 on port 7000 +Listening for system connection on port 3457 +0: system.remote_gdb.listener: listening for remote gdb on port 7001 warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout index db9ad862d..69f3594a5 100644 --- a/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout +++ b/tests/quick/10.linux-boot/ref/alpha/linux/tsunami-simple-timing/stdout @@ -5,10 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:10:03 -M5 started Mon Jun 11 01:14:34 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/10.linux-boot/alpha/linux/tsunami-simple-timing tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing +M5 compiled Aug 3 2007 04:02:11 +M5 started Fri Aug 3 04:23:34 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/10.linux-boot/alpha/linux/tsunami-simple-timing tests/run.py quick/10.linux-boot/alpha/linux/tsunami-simple-timing Global frequency set at 1000000000000 ticks per second - 0: system.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 -Exiting @ tick 1907146437000 because m5_exit instruction encountered +Exiting @ tick 1910309711000 because m5_exit instruction encountered diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini index e30600052..8bac0dec4 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/config.ini @@ -27,12 +27,9 @@ test=system.cpu0.l1c.cpu_side [system.cpu0.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -50,12 +47,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu0.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -64,11 +59,6 @@ write_buffers=8 cpu_side=system.cpu0.test mem_side=system.toL2Bus.port[1] -[system.cpu0.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu1] type=MemTest children=l1c @@ -87,12 +77,9 @@ test=system.cpu1.l1c.cpu_side [system.cpu1.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -110,12 +97,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu1.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -124,11 +109,6 @@ write_buffers=8 cpu_side=system.cpu1.test mem_side=system.toL2Bus.port[2] -[system.cpu1.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu2] type=MemTest children=l1c @@ -147,12 +127,9 @@ test=system.cpu2.l1c.cpu_side [system.cpu2.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -170,12 +147,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu2.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -184,11 +159,6 @@ write_buffers=8 cpu_side=system.cpu2.test mem_side=system.toL2Bus.port[3] -[system.cpu2.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu3] type=MemTest children=l1c @@ -207,12 +177,9 @@ test=system.cpu3.l1c.cpu_side [system.cpu3.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -230,12 +197,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu3.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -244,11 +209,6 @@ write_buffers=8 cpu_side=system.cpu3.test mem_side=system.toL2Bus.port[4] -[system.cpu3.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu4] type=MemTest children=l1c @@ -267,12 +227,9 @@ test=system.cpu4.l1c.cpu_side [system.cpu4.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -290,12 +247,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu4.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -304,11 +259,6 @@ write_buffers=8 cpu_side=system.cpu4.test mem_side=system.toL2Bus.port[5] -[system.cpu4.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu5] type=MemTest children=l1c @@ -327,12 +277,9 @@ test=system.cpu5.l1c.cpu_side [system.cpu5.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -350,12 +297,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu5.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -364,11 +309,6 @@ write_buffers=8 cpu_side=system.cpu5.test mem_side=system.toL2Bus.port[6] -[system.cpu5.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu6] type=MemTest children=l1c @@ -387,12 +327,9 @@ test=system.cpu6.l1c.cpu_side [system.cpu6.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -410,12 +347,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu6.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -424,11 +359,6 @@ write_buffers=8 cpu_side=system.cpu6.test mem_side=system.toL2Bus.port[7] -[system.cpu6.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.cpu7] type=MemTest children=l1c @@ -447,12 +377,9 @@ test=system.cpu7.l1c.cpu_side [system.cpu7.l1c] type=BaseCache -children=protocol -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=4 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=1000 lifo=false @@ -470,12 +397,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=system.cpu7.l1c.protocol repl=Null size=32768 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=8 trace_addr=0 @@ -484,11 +409,6 @@ write_buffers=8 cpu_side=system.cpu7.test mem_side=system.toL2Bus.port[8] -[system.cpu7.l1c.protocol] -type=CoherenceProtocol -do_upgrades=true -protocol=moesi - [system.funcmem] type=PhysicalMemory file= @@ -499,11 +419,9 @@ port=system.cpu0.functional system.cpu1.functional system.cpu2.functional system [system.l2c] type=BaseCache -adaptive_compression=false +addr_range=0:18446744073709551615 assoc=8 block_size=64 -compressed_bus=false -compression_latency=0 hash_delay=1 latency=10000 lifo=false @@ -521,12 +439,10 @@ prefetch_serial_squash=false prefetch_use_cpu_id=true prefetcher_size=100 prioritizeRequests=false -protocol=Null repl=Null size=65536 split=false split_size=0 -store_compressed=false subblock_size=0 tgts_per_mshr=16 trace_addr=0 diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt b/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt index 752268088..c54bfdce4 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/m5stats.txt @@ -1,72 +1,71 @@ ---------- Begin Simulation Statistics ---------- -host_seconds 37943.64 # Real time elapsed on the host -host_tick_rate 2223 # Simulator tick rate (ticks/s) +host_mem_usage 318912 # Number of bytes of host memory used +host_seconds 272.84 # Real time elapsed on the host +host_tick_rate 598087 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks -sim_seconds 0.000084 # Number of seconds simulated -sim_ticks 84350509 # Number of ticks simulated -system.cpu0.l1c.ReadReq_accesses 44421 # number of ReadReq accesses(hits+misses) -system.cpu0.l1c.ReadReq_avg_miss_latency 14010.391786 # average ReadReq miss latency -system.cpu0.l1c.ReadReq_avg_mshr_miss_latency 12986.475734 # average ReadReq mshr miss latency -system.cpu0.l1c.ReadReq_hits 7291 # number of ReadReq hits -system.cpu0.l1c.ReadReq_miss_latency 520205847 # number of ReadReq miss cycles -system.cpu0.l1c.ReadReq_miss_rate 0.835866 # miss rate for ReadReq accesses -system.cpu0.l1c.ReadReq_misses 37130 # number of ReadReq misses -system.cpu0.l1c.ReadReq_mshr_miss_latency 482187844 # number of ReadReq MSHR miss cycles -system.cpu0.l1c.ReadReq_mshr_miss_rate 0.835866 # mshr miss rate for ReadReq accesses -system.cpu0.l1c.ReadReq_mshr_misses 37130 # number of ReadReq MSHR misses -system.cpu0.l1c.ReadReq_mshr_uncacheable 9916 # number of ReadReq MSHR uncacheable -system.cpu0.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu0.l1c.ReadResp_mshr_uncacheable_latency 255520881 # number of ReadResp MSHR uncacheable cycles -system.cpu0.l1c.WriteReq_accesses 23898 # number of WriteReq accesses(hits+misses) -system.cpu0.l1c.WriteReq_avg_miss_latency 12904.605270 # average WriteReq miss latency -system.cpu0.l1c.WriteReq_avg_mshr_miss_latency 11399.917485 # average WriteReq mshr miss latency -system.cpu0.l1c.WriteReq_hits 1090 # number of WriteReq hits -system.cpu0.l1c.WriteReq_miss_latency 294328237 # number of WriteReq miss cycles -system.cpu0.l1c.WriteReq_miss_rate 0.954389 # miss rate for WriteReq accesses -system.cpu0.l1c.WriteReq_misses 22808 # number of WriteReq misses -system.cpu0.l1c.WriteReq_mshr_miss_latency 260009318 # number of WriteReq MSHR miss cycles -system.cpu0.l1c.WriteReq_mshr_miss_rate 0.954389 # mshr miss rate for WriteReq accesses -system.cpu0.l1c.WriteReq_mshr_misses 22808 # number of WriteReq MSHR misses -system.cpu0.l1c.WriteReq_mshr_uncacheable 5184 # number of WriteReq MSHR uncacheable -system.cpu0.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu0.l1c.WriteResp_mshr_uncacheable_latency 154702333 # number of WriteResp MSHR uncacheable cycles -system.cpu0.l1c.avg_blocked_cycles_no_mshrs 1194.948852 # average number of cycles each access was blocked +sim_seconds 0.000163 # Number of seconds simulated +sim_ticks 163182312 # Number of ticks simulated +system.cpu0.l1c.ReadReq_accesses 44955 # number of ReadReq accesses(hits+misses) +system.cpu0.l1c.ReadReq_avg_miss_latency 22713.586650 # average ReadReq miss latency +system.cpu0.l1c.ReadReq_avg_mshr_miss_latency 22705.587882 # average ReadReq mshr miss latency +system.cpu0.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu0.l1c.ReadReq_hits 7621 # number of ReadReq hits +system.cpu0.l1c.ReadReq_miss_latency 847989044 # number of ReadReq miss cycles +system.cpu0.l1c.ReadReq_miss_rate 0.830475 # miss rate for ReadReq accesses +system.cpu0.l1c.ReadReq_misses 37334 # number of ReadReq misses +system.cpu0.l1c.ReadReq_mshr_miss_latency 847690418 # number of ReadReq MSHR miss cycles +system.cpu0.l1c.ReadReq_mshr_miss_rate 0.830475 # mshr miss rate for ReadReq accesses +system.cpu0.l1c.ReadReq_mshr_misses 37334 # number of ReadReq MSHR misses +system.cpu0.l1c.ReadReq_mshr_uncacheable_latency 517943783 # number of ReadReq MSHR uncacheable cycles +system.cpu0.l1c.WriteReq_accesses 24357 # number of WriteReq accesses(hits+misses) +system.cpu0.l1c.WriteReq_avg_miss_latency 24775.291654 # average WriteReq miss latency +system.cpu0.l1c.WriteReq_avg_mshr_miss_latency 24768.103842 # average WriteReq mshr miss latency +system.cpu0.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu0.l1c.WriteReq_hits 956 # number of WriteReq hits +system.cpu0.l1c.WriteReq_miss_latency 579766600 # number of WriteReq miss cycles +system.cpu0.l1c.WriteReq_miss_rate 0.960751 # miss rate for WriteReq accesses +system.cpu0.l1c.WriteReq_misses 23401 # number of WriteReq misses +system.cpu0.l1c.WriteReq_mshr_miss_latency 579598398 # number of WriteReq MSHR miss cycles +system.cpu0.l1c.WriteReq_mshr_miss_rate 0.960751 # mshr miss rate for WriteReq accesses +system.cpu0.l1c.WriteReq_mshr_misses 23401 # number of WriteReq MSHR misses +system.cpu0.l1c.WriteReq_mshr_uncacheable_latency 315492846 # number of WriteReq MSHR uncacheable cycles +system.cpu0.l1c.avg_blocked_cycles_no_mshrs 2283.512556 # average number of cycles each access was blocked system.cpu0.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu0.l1c.avg_refs 0.407238 # Average number of references to valid blocks. -system.cpu0.l1c.blocked_no_mshrs 69093 # number of cycles access was blocked +system.cpu0.l1c.avg_refs 0.411295 # Average number of references to valid blocks. +system.cpu0.l1c.blocked_no_mshrs 69290 # number of cycles access was blocked system.cpu0.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu0.l1c.blocked_cycles_no_mshrs 82562601 # number of cycles access was blocked +system.cpu0.l1c.blocked_cycles_no_mshrs 158224585 # number of cycles access was blocked system.cpu0.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu0.l1c.cache_copies 0 # number of cache copies performed -system.cpu0.l1c.demand_accesses 68319 # number of demand (read+write) accesses -system.cpu0.l1c.demand_avg_miss_latency 13589.610664 # average overall miss latency -system.cpu0.l1c.demand_avg_mshr_miss_latency 12382.748206 # average overall mshr miss latency -system.cpu0.l1c.demand_hits 8381 # number of demand (read+write) hits -system.cpu0.l1c.demand_miss_latency 814534084 # number of demand (read+write) miss cycles -system.cpu0.l1c.demand_miss_rate 0.877325 # miss rate for demand accesses -system.cpu0.l1c.demand_misses 59938 # number of demand (read+write) misses +system.cpu0.l1c.demand_accesses 69312 # number of demand (read+write) accesses +system.cpu0.l1c.demand_avg_miss_latency 23507.954952 # average overall miss latency +system.cpu0.l1c.demand_avg_mshr_miss_latency 23500.268642 # average overall mshr miss latency +system.cpu0.l1c.demand_hits 8577 # number of demand (read+write) hits +system.cpu0.l1c.demand_miss_latency 1427755644 # number of demand (read+write) miss cycles +system.cpu0.l1c.demand_miss_rate 0.876255 # miss rate for demand accesses +system.cpu0.l1c.demand_misses 60735 # number of demand (read+write) misses system.cpu0.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu0.l1c.demand_mshr_miss_latency 742197162 # number of demand (read+write) MSHR miss cycles -system.cpu0.l1c.demand_mshr_miss_rate 0.877325 # mshr miss rate for demand accesses -system.cpu0.l1c.demand_mshr_misses 59938 # number of demand (read+write) MSHR misses +system.cpu0.l1c.demand_mshr_miss_latency 1427288816 # number of demand (read+write) MSHR miss cycles +system.cpu0.l1c.demand_mshr_miss_rate 0.876255 # mshr miss rate for demand accesses +system.cpu0.l1c.demand_mshr_misses 60735 # number of demand (read+write) MSHR misses system.cpu0.l1c.fast_writes 0 # number of fast writes performed system.cpu0.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu0.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu0.l1c.overall_accesses 68319 # number of overall (read+write) accesses -system.cpu0.l1c.overall_avg_miss_latency 13589.610664 # average overall miss latency -system.cpu0.l1c.overall_avg_mshr_miss_latency 12382.748206 # average overall mshr miss latency -system.cpu0.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu0.l1c.overall_hits 8381 # number of overall hits -system.cpu0.l1c.overall_miss_latency 814534084 # number of overall miss cycles -system.cpu0.l1c.overall_miss_rate 0.877325 # miss rate for overall accesses -system.cpu0.l1c.overall_misses 59938 # number of overall misses +system.cpu0.l1c.overall_accesses 69312 # number of overall (read+write) accesses +system.cpu0.l1c.overall_avg_miss_latency 23507.954952 # average overall miss latency +system.cpu0.l1c.overall_avg_mshr_miss_latency 23500.268642 # average overall mshr miss latency +system.cpu0.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu0.l1c.overall_hits 8577 # number of overall hits +system.cpu0.l1c.overall_miss_latency 1427755644 # number of overall miss cycles +system.cpu0.l1c.overall_miss_rate 0.876255 # miss rate for overall accesses +system.cpu0.l1c.overall_misses 60735 # number of overall misses system.cpu0.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu0.l1c.overall_mshr_miss_latency 742197162 # number of overall MSHR miss cycles -system.cpu0.l1c.overall_mshr_miss_rate 0.877325 # mshr miss rate for overall accesses -system.cpu0.l1c.overall_mshr_misses 59938 # number of overall MSHR misses -system.cpu0.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu0.l1c.overall_mshr_uncacheable_misses 15100 # number of overall MSHR uncacheable misses +system.cpu0.l1c.overall_mshr_miss_latency 1427288816 # number of overall MSHR miss cycles +system.cpu0.l1c.overall_mshr_miss_rate 0.876255 # mshr miss rate for overall accesses +system.cpu0.l1c.overall_mshr_misses 60735 # number of overall MSHR misses +system.cpu0.l1c.overall_mshr_uncacheable_latency 833436629 # number of overall MSHR uncacheable cycles +system.cpu0.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu0.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu0.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu0.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -76,104 +75,76 @@ system.cpu0.l1c.prefetcher.num_hwpf_issued 0 # system.cpu0.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu0.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu0.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu0.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu0.l1c.protocol.read_invalid 1761660 # read misses to invalid blocks -system.cpu0.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu0.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu0.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu0.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu0.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu0.l1c.protocol.snoop_read_exclusive 2836 # read snoops on exclusive blocks -system.cpu0.l1c.protocol.snoop_read_modified 12378 # read snoops on modified blocks -system.cpu0.l1c.protocol.snoop_read_owned 7300 # read snoops on owned blocks -system.cpu0.l1c.protocol.snoop_read_shared 1749577 # read snoops on shared blocks -system.cpu0.l1c.protocol.snoop_readex_exclusive 1616 # readEx snoops on exclusive blocks -system.cpu0.l1c.protocol.snoop_readex_modified 6692 # readEx snoops on modified blocks -system.cpu0.l1c.protocol.snoop_readex_owned 4009 # readEx snoops on owned blocks -system.cpu0.l1c.protocol.snoop_readex_shared 12550 # readEx snoops on shared blocks -system.cpu0.l1c.protocol.snoop_upgrade_owned 790 # upgrade snoops on owned blocks -system.cpu0.l1c.protocol.snoop_upgrade_shared 3004 # upgradee snoops on shared blocks -system.cpu0.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu0.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu0.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu0.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu0.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu0.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu0.l1c.protocol.write_invalid 940728 # write misses to invalid blocks -system.cpu0.l1c.protocol.write_owned 1344 # write misses to owned blocks -system.cpu0.l1c.protocol.write_shared 4484 # write misses to shared blocks -system.cpu0.l1c.replacements 27160 # number of replacements -system.cpu0.l1c.sampled_refs 27495 # Sample count of references to valid blocks. +system.cpu0.l1c.replacements 28052 # number of replacements +system.cpu0.l1c.sampled_refs 28403 # Sample count of references to valid blocks. system.cpu0.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu0.l1c.tagsinuse 342.709273 # Cycle average of tags in use -system.cpu0.l1c.total_refs 11197 # Total number of references to valid blocks. +system.cpu0.l1c.tagsinuse 348.576200 # Cycle average of tags in use +system.cpu0.l1c.total_refs 11682 # Total number of references to valid blocks. system.cpu0.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu0.l1c.writebacks 10716 # number of writebacks +system.cpu0.l1c.writebacks 11146 # number of writebacks system.cpu0.num_copies 0 # number of copy accesses completed -system.cpu0.num_reads 98012 # number of read accesses completed -system.cpu0.num_writes 53207 # number of write accesses completed -system.cpu1.l1c.ReadReq_accesses 44893 # number of ReadReq accesses(hits+misses) -system.cpu1.l1c.ReadReq_avg_miss_latency 13909.754864 # average ReadReq miss latency -system.cpu1.l1c.ReadReq_avg_mshr_miss_latency 12900.185775 # average ReadReq mshr miss latency -system.cpu1.l1c.ReadReq_hits 7579 # number of ReadReq hits -system.cpu1.l1c.ReadReq_miss_latency 519028593 # number of ReadReq miss cycles -system.cpu1.l1c.ReadReq_miss_rate 0.831176 # miss rate for ReadReq accesses -system.cpu1.l1c.ReadReq_misses 37314 # number of ReadReq misses -system.cpu1.l1c.ReadReq_mshr_miss_latency 481357532 # number of ReadReq MSHR miss cycles -system.cpu1.l1c.ReadReq_mshr_miss_rate 0.831176 # mshr miss rate for ReadReq accesses -system.cpu1.l1c.ReadReq_mshr_misses 37314 # number of ReadReq MSHR misses -system.cpu1.l1c.ReadReq_mshr_uncacheable 9811 # number of ReadReq MSHR uncacheable -system.cpu1.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu1.l1c.ReadResp_mshr_uncacheable_latency 251708747 # number of ReadResp MSHR uncacheable cycles -system.cpu1.l1c.WriteReq_accesses 24614 # number of WriteReq accesses(hits+misses) -system.cpu1.l1c.WriteReq_avg_miss_latency 12788.679753 # average WriteReq miss latency -system.cpu1.l1c.WriteReq_avg_mshr_miss_latency 11344.205121 # average WriteReq mshr miss latency -system.cpu1.l1c.WriteReq_hits 1257 # number of WriteReq hits -system.cpu1.l1c.WriteReq_miss_latency 298705193 # number of WriteReq miss cycles -system.cpu1.l1c.WriteReq_miss_rate 0.948932 # miss rate for WriteReq accesses -system.cpu1.l1c.WriteReq_misses 23357 # number of WriteReq misses -system.cpu1.l1c.WriteReq_mshr_miss_latency 264966599 # number of WriteReq MSHR miss cycles -system.cpu1.l1c.WriteReq_mshr_miss_rate 0.948932 # mshr miss rate for WriteReq accesses -system.cpu1.l1c.WriteReq_mshr_misses 23357 # number of WriteReq MSHR misses -system.cpu1.l1c.WriteReq_mshr_uncacheable 5453 # number of WriteReq MSHR uncacheable -system.cpu1.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu1.l1c.WriteResp_mshr_uncacheable_latency 163813954 # number of WriteResp MSHR uncacheable cycles -system.cpu1.l1c.avg_blocked_cycles_no_mshrs 1183.149435 # average number of cycles each access was blocked +system.cpu0.num_reads 99892 # number of read accesses completed +system.cpu0.num_writes 54159 # number of write accesses completed +system.cpu1.l1c.ReadReq_accesses 44788 # number of ReadReq accesses(hits+misses) +system.cpu1.l1c.ReadReq_avg_miss_latency 22745.661074 # average ReadReq miss latency +system.cpu1.l1c.ReadReq_avg_mshr_miss_latency 22737.662205 # average ReadReq mshr miss latency +system.cpu1.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu1.l1c.ReadReq_hits 7659 # number of ReadReq hits +system.cpu1.l1c.ReadReq_miss_latency 844523650 # number of ReadReq miss cycles +system.cpu1.l1c.ReadReq_miss_rate 0.828994 # miss rate for ReadReq accesses +system.cpu1.l1c.ReadReq_misses 37129 # number of ReadReq misses +system.cpu1.l1c.ReadReq_mshr_miss_latency 844226660 # number of ReadReq MSHR miss cycles +system.cpu1.l1c.ReadReq_mshr_miss_rate 0.828994 # mshr miss rate for ReadReq accesses +system.cpu1.l1c.ReadReq_mshr_misses 37129 # number of ReadReq MSHR misses +system.cpu1.l1c.ReadReq_mshr_uncacheable_latency 524670355 # number of ReadReq MSHR uncacheable cycles +system.cpu1.l1c.WriteReq_accesses 24323 # number of WriteReq accesses(hits+misses) +system.cpu1.l1c.WriteReq_avg_miss_latency 24767.283276 # average WriteReq miss latency +system.cpu1.l1c.WriteReq_avg_mshr_miss_latency 24760.081804 # average WriteReq mshr miss latency +system.cpu1.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu1.l1c.WriteReq_hits 950 # number of WriteReq hits +system.cpu1.l1c.WriteReq_miss_latency 578885712 # number of WriteReq miss cycles +system.cpu1.l1c.WriteReq_miss_rate 0.960942 # miss rate for WriteReq accesses +system.cpu1.l1c.WriteReq_misses 23373 # number of WriteReq misses +system.cpu1.l1c.WriteReq_mshr_miss_latency 578717392 # number of WriteReq MSHR miss cycles +system.cpu1.l1c.WriteReq_mshr_miss_rate 0.960942 # mshr miss rate for WriteReq accesses +system.cpu1.l1c.WriteReq_mshr_misses 23373 # number of WriteReq MSHR misses +system.cpu1.l1c.WriteReq_mshr_uncacheable_latency 319087206 # number of WriteReq MSHR uncacheable cycles +system.cpu1.l1c.avg_blocked_cycles_no_mshrs 2291.446711 # average number of cycles each access was blocked system.cpu1.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu1.l1c.avg_refs 0.414323 # Average number of references to valid blocks. -system.cpu1.l1c.blocked_no_mshrs 69763 # number of cycles access was blocked +system.cpu1.l1c.avg_refs 0.414757 # Average number of references to valid blocks. +system.cpu1.l1c.blocked_no_mshrs 69358 # number of cycles access was blocked system.cpu1.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu1.l1c.blocked_cycles_no_mshrs 82540054 # number of cycles access was blocked +system.cpu1.l1c.blocked_cycles_no_mshrs 158930161 # number of cycles access was blocked system.cpu1.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu1.l1c.cache_copies 0 # number of cache copies performed -system.cpu1.l1c.demand_accesses 69507 # number of demand (read+write) accesses -system.cpu1.l1c.demand_avg_miss_latency 13478.165615 # average overall miss latency -system.cpu1.l1c.demand_avg_mshr_miss_latency 12301.167461 # average overall mshr miss latency -system.cpu1.l1c.demand_hits 8836 # number of demand (read+write) hits -system.cpu1.l1c.demand_miss_latency 817733786 # number of demand (read+write) miss cycles -system.cpu1.l1c.demand_miss_rate 0.872876 # miss rate for demand accesses -system.cpu1.l1c.demand_misses 60671 # number of demand (read+write) misses +system.cpu1.l1c.demand_accesses 69111 # number of demand (read+write) accesses +system.cpu1.l1c.demand_avg_miss_latency 23526.649731 # average overall miss latency +system.cpu1.l1c.demand_avg_mshr_miss_latency 23518.958910 # average overall mshr miss latency +system.cpu1.l1c.demand_hits 8609 # number of demand (read+write) hits +system.cpu1.l1c.demand_miss_latency 1423409362 # number of demand (read+write) miss cycles +system.cpu1.l1c.demand_miss_rate 0.875432 # miss rate for demand accesses +system.cpu1.l1c.demand_misses 60502 # number of demand (read+write) misses system.cpu1.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu1.l1c.demand_mshr_miss_latency 746324131 # number of demand (read+write) MSHR miss cycles -system.cpu1.l1c.demand_mshr_miss_rate 0.872876 # mshr miss rate for demand accesses -system.cpu1.l1c.demand_mshr_misses 60671 # number of demand (read+write) MSHR misses +system.cpu1.l1c.demand_mshr_miss_latency 1422944052 # number of demand (read+write) MSHR miss cycles +system.cpu1.l1c.demand_mshr_miss_rate 0.875432 # mshr miss rate for demand accesses +system.cpu1.l1c.demand_mshr_misses 60502 # number of demand (read+write) MSHR misses system.cpu1.l1c.fast_writes 0 # number of fast writes performed system.cpu1.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu1.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu1.l1c.overall_accesses 69507 # number of overall (read+write) accesses -system.cpu1.l1c.overall_avg_miss_latency 13478.165615 # average overall miss latency -system.cpu1.l1c.overall_avg_mshr_miss_latency 12301.167461 # average overall mshr miss latency -system.cpu1.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu1.l1c.overall_hits 8836 # number of overall hits -system.cpu1.l1c.overall_miss_latency 817733786 # number of overall miss cycles -system.cpu1.l1c.overall_miss_rate 0.872876 # miss rate for overall accesses -system.cpu1.l1c.overall_misses 60671 # number of overall misses +system.cpu1.l1c.overall_accesses 69111 # number of overall (read+write) accesses +system.cpu1.l1c.overall_avg_miss_latency 23526.649731 # average overall miss latency +system.cpu1.l1c.overall_avg_mshr_miss_latency 23518.958910 # average overall mshr miss latency +system.cpu1.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu1.l1c.overall_hits 8609 # number of overall hits +system.cpu1.l1c.overall_miss_latency 1423409362 # number of overall miss cycles +system.cpu1.l1c.overall_miss_rate 0.875432 # miss rate for overall accesses +system.cpu1.l1c.overall_misses 60502 # number of overall misses system.cpu1.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu1.l1c.overall_mshr_miss_latency 746324131 # number of overall MSHR miss cycles -system.cpu1.l1c.overall_mshr_miss_rate 0.872876 # mshr miss rate for overall accesses -system.cpu1.l1c.overall_mshr_misses 60671 # number of overall MSHR misses -system.cpu1.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu1.l1c.overall_mshr_uncacheable_misses 15264 # number of overall MSHR uncacheable misses +system.cpu1.l1c.overall_mshr_miss_latency 1422944052 # number of overall MSHR miss cycles +system.cpu1.l1c.overall_mshr_miss_rate 0.875432 # mshr miss rate for overall accesses +system.cpu1.l1c.overall_mshr_misses 60502 # number of overall MSHR misses +system.cpu1.l1c.overall_mshr_uncacheable_latency 843757561 # number of overall MSHR uncacheable cycles +system.cpu1.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu1.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu1.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu1.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -183,104 +154,76 @@ system.cpu1.l1c.prefetcher.num_hwpf_issued 0 # system.cpu1.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu1.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu1.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu1.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu1.l1c.protocol.read_invalid 1717891 # read misses to invalid blocks -system.cpu1.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu1.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu1.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu1.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu1.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu1.l1c.protocol.snoop_read_exclusive 2925 # read snoops on exclusive blocks -system.cpu1.l1c.protocol.snoop_read_modified 12701 # read snoops on modified blocks -system.cpu1.l1c.protocol.snoop_read_owned 7436 # read snoops on owned blocks -system.cpu1.l1c.protocol.snoop_read_shared 1669937 # read snoops on shared blocks -system.cpu1.l1c.protocol.snoop_readex_exclusive 1611 # readEx snoops on exclusive blocks -system.cpu1.l1c.protocol.snoop_readex_modified 6726 # readEx snoops on modified blocks -system.cpu1.l1c.protocol.snoop_readex_owned 3965 # readEx snoops on owned blocks -system.cpu1.l1c.protocol.snoop_readex_shared 12596 # readEx snoops on shared blocks -system.cpu1.l1c.protocol.snoop_upgrade_owned 860 # upgrade snoops on owned blocks -system.cpu1.l1c.protocol.snoop_upgrade_shared 2979 # upgradee snoops on shared blocks -system.cpu1.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu1.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu1.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu1.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu1.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu1.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu1.l1c.protocol.write_invalid 914774 # write misses to invalid blocks -system.cpu1.l1c.protocol.write_owned 1422 # write misses to owned blocks -system.cpu1.l1c.protocol.write_shared 4382 # write misses to shared blocks -system.cpu1.l1c.replacements 27806 # number of replacements -system.cpu1.l1c.sampled_refs 28164 # Sample count of references to valid blocks. +system.cpu1.l1c.replacements 27765 # number of replacements +system.cpu1.l1c.sampled_refs 28108 # Sample count of references to valid blocks. system.cpu1.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu1.l1c.tagsinuse 345.545872 # Cycle average of tags in use -system.cpu1.l1c.total_refs 11669 # Total number of references to valid blocks. +system.cpu1.l1c.tagsinuse 346.327274 # Cycle average of tags in use +system.cpu1.l1c.total_refs 11658 # Total number of references to valid blocks. system.cpu1.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu1.l1c.writebacks 11204 # number of writebacks +system.cpu1.l1c.writebacks 10962 # number of writebacks system.cpu1.num_copies 0 # number of copy accesses completed -system.cpu1.num_reads 100000 # number of read accesses completed -system.cpu1.num_writes 54335 # number of write accesses completed -system.cpu2.l1c.ReadReq_accesses 44489 # number of ReadReq accesses(hits+misses) -system.cpu2.l1c.ReadReq_avg_miss_latency 14018.031231 # average ReadReq miss latency -system.cpu2.l1c.ReadReq_avg_mshr_miss_latency 12993.788573 # average ReadReq mshr miss latency -system.cpu2.l1c.ReadReq_hits 7507 # number of ReadReq hits -system.cpu2.l1c.ReadReq_miss_latency 518414831 # number of ReadReq miss cycles -system.cpu2.l1c.ReadReq_miss_rate 0.831262 # miss rate for ReadReq accesses -system.cpu2.l1c.ReadReq_misses 36982 # number of ReadReq misses -system.cpu2.l1c.ReadReq_mshr_miss_latency 480536289 # number of ReadReq MSHR miss cycles -system.cpu2.l1c.ReadReq_mshr_miss_rate 0.831262 # mshr miss rate for ReadReq accesses -system.cpu2.l1c.ReadReq_mshr_misses 36982 # number of ReadReq MSHR misses -system.cpu2.l1c.ReadReq_mshr_uncacheable 9861 # number of ReadReq MSHR uncacheable -system.cpu2.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu2.l1c.ReadResp_mshr_uncacheable_latency 253484666 # number of ReadResp MSHR uncacheable cycles -system.cpu2.l1c.WriteReq_accesses 24340 # number of WriteReq accesses(hits+misses) -system.cpu2.l1c.WriteReq_avg_miss_latency 12765.385606 # average WriteReq miss latency -system.cpu2.l1c.WriteReq_avg_mshr_miss_latency 11318.971789 # average WriteReq mshr miss latency -system.cpu2.l1c.WriteReq_hits 1122 # number of WriteReq hits -system.cpu2.l1c.WriteReq_miss_latency 296386723 # number of WriteReq miss cycles -system.cpu2.l1c.WriteReq_miss_rate 0.953903 # miss rate for WriteReq accesses -system.cpu2.l1c.WriteReq_misses 23218 # number of WriteReq misses -system.cpu2.l1c.WriteReq_mshr_miss_latency 262803887 # number of WriteReq MSHR miss cycles -system.cpu2.l1c.WriteReq_mshr_miss_rate 0.953903 # mshr miss rate for WriteReq accesses -system.cpu2.l1c.WriteReq_mshr_misses 23218 # number of WriteReq MSHR misses -system.cpu2.l1c.WriteReq_mshr_uncacheable 5480 # number of WriteReq MSHR uncacheable -system.cpu2.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu2.l1c.WriteResp_mshr_uncacheable_latency 165110755 # number of WriteResp MSHR uncacheable cycles -system.cpu2.l1c.avg_blocked_cycles_no_mshrs 1190.317505 # average number of cycles each access was blocked +system.cpu1.num_reads 99692 # number of read accesses completed +system.cpu1.num_writes 53844 # number of write accesses completed +system.cpu2.l1c.ReadReq_accesses 45045 # number of ReadReq accesses(hits+misses) +system.cpu2.l1c.ReadReq_avg_miss_latency 22675.185062 # average ReadReq miss latency +system.cpu2.l1c.ReadReq_avg_mshr_miss_latency 22667.185702 # average ReadReq mshr miss latency +system.cpu2.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu2.l1c.ReadReq_hits 7544 # number of ReadReq hits +system.cpu2.l1c.ReadReq_miss_latency 850342115 # number of ReadReq miss cycles +system.cpu2.l1c.ReadReq_miss_rate 0.832523 # miss rate for ReadReq accesses +system.cpu2.l1c.ReadReq_misses 37501 # number of ReadReq misses +system.cpu2.l1c.ReadReq_mshr_miss_latency 850042131 # number of ReadReq MSHR miss cycles +system.cpu2.l1c.ReadReq_mshr_miss_rate 0.832523 # mshr miss rate for ReadReq accesses +system.cpu2.l1c.ReadReq_mshr_misses 37501 # number of ReadReq MSHR misses +system.cpu2.l1c.ReadReq_mshr_uncacheable_latency 526690736 # number of ReadReq MSHR uncacheable cycles +system.cpu2.l1c.WriteReq_accesses 23975 # number of WriteReq accesses(hits+misses) +system.cpu2.l1c.WriteReq_avg_miss_latency 24810.638326 # average WriteReq miss latency +system.cpu2.l1c.WriteReq_avg_mshr_miss_latency 24803.479873 # average WriteReq mshr miss latency +system.cpu2.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu2.l1c.WriteReq_hits 946 # number of WriteReq hits +system.cpu2.l1c.WriteReq_miss_latency 571364190 # number of WriteReq miss cycles +system.cpu2.l1c.WriteReq_miss_rate 0.960542 # miss rate for WriteReq accesses +system.cpu2.l1c.WriteReq_misses 23029 # number of WriteReq misses +system.cpu2.l1c.WriteReq_mshr_miss_latency 571199338 # number of WriteReq MSHR miss cycles +system.cpu2.l1c.WriteReq_mshr_miss_rate 0.960542 # mshr miss rate for WriteReq accesses +system.cpu2.l1c.WriteReq_mshr_misses 23029 # number of WriteReq MSHR misses +system.cpu2.l1c.WriteReq_mshr_uncacheable_latency 314108208 # number of WriteReq MSHR uncacheable cycles +system.cpu2.l1c.avg_blocked_cycles_no_mshrs 2295.331392 # average number of cycles each access was blocked system.cpu2.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu2.l1c.avg_refs 0.414721 # Average number of references to valid blocks. -system.cpu2.l1c.blocked_no_mshrs 69202 # number of cycles access was blocked +system.cpu2.l1c.avg_refs 0.417132 # Average number of references to valid blocks. +system.cpu2.l1c.blocked_no_mshrs 69383 # number of cycles access was blocked system.cpu2.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu2.l1c.blocked_cycles_no_mshrs 82372352 # number of cycles access was blocked +system.cpu2.l1c.blocked_cycles_no_mshrs 159256978 # number of cycles access was blocked system.cpu2.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu2.l1c.cache_copies 0 # number of cache copies performed -system.cpu2.l1c.demand_accesses 68829 # number of demand (read+write) accesses -system.cpu2.l1c.demand_avg_miss_latency 13534.909535 # average overall miss latency -system.cpu2.l1c.demand_avg_mshr_miss_latency 12347.843455 # average overall mshr miss latency -system.cpu2.l1c.demand_hits 8629 # number of demand (read+write) hits -system.cpu2.l1c.demand_miss_latency 814801554 # number of demand (read+write) miss cycles -system.cpu2.l1c.demand_miss_rate 0.874631 # miss rate for demand accesses -system.cpu2.l1c.demand_misses 60200 # number of demand (read+write) misses +system.cpu2.l1c.demand_accesses 69020 # number of demand (read+write) accesses +system.cpu2.l1c.demand_avg_miss_latency 23487.631009 # average overall miss latency +system.cpu2.l1c.demand_avg_mshr_miss_latency 23479.951578 # average overall mshr miss latency +system.cpu2.l1c.demand_hits 8490 # number of demand (read+write) hits +system.cpu2.l1c.demand_miss_latency 1421706305 # number of demand (read+write) miss cycles +system.cpu2.l1c.demand_miss_rate 0.876992 # miss rate for demand accesses +system.cpu2.l1c.demand_misses 60530 # number of demand (read+write) misses system.cpu2.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu2.l1c.demand_mshr_miss_latency 743340176 # number of demand (read+write) MSHR miss cycles -system.cpu2.l1c.demand_mshr_miss_rate 0.874631 # mshr miss rate for demand accesses -system.cpu2.l1c.demand_mshr_misses 60200 # number of demand (read+write) MSHR misses +system.cpu2.l1c.demand_mshr_miss_latency 1421241469 # number of demand (read+write) MSHR miss cycles +system.cpu2.l1c.demand_mshr_miss_rate 0.876992 # mshr miss rate for demand accesses +system.cpu2.l1c.demand_mshr_misses 60530 # number of demand (read+write) MSHR misses system.cpu2.l1c.fast_writes 0 # number of fast writes performed system.cpu2.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu2.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu2.l1c.overall_accesses 68829 # number of overall (read+write) accesses -system.cpu2.l1c.overall_avg_miss_latency 13534.909535 # average overall miss latency -system.cpu2.l1c.overall_avg_mshr_miss_latency 12347.843455 # average overall mshr miss latency -system.cpu2.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu2.l1c.overall_hits 8629 # number of overall hits -system.cpu2.l1c.overall_miss_latency 814801554 # number of overall miss cycles -system.cpu2.l1c.overall_miss_rate 0.874631 # miss rate for overall accesses -system.cpu2.l1c.overall_misses 60200 # number of overall misses +system.cpu2.l1c.overall_accesses 69020 # number of overall (read+write) accesses +system.cpu2.l1c.overall_avg_miss_latency 23487.631009 # average overall miss latency +system.cpu2.l1c.overall_avg_mshr_miss_latency 23479.951578 # average overall mshr miss latency +system.cpu2.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu2.l1c.overall_hits 8490 # number of overall hits +system.cpu2.l1c.overall_miss_latency 1421706305 # number of overall miss cycles +system.cpu2.l1c.overall_miss_rate 0.876992 # miss rate for overall accesses +system.cpu2.l1c.overall_misses 60530 # number of overall misses system.cpu2.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu2.l1c.overall_mshr_miss_latency 743340176 # number of overall MSHR miss cycles -system.cpu2.l1c.overall_mshr_miss_rate 0.874631 # mshr miss rate for overall accesses -system.cpu2.l1c.overall_mshr_misses 60200 # number of overall MSHR misses -system.cpu2.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu2.l1c.overall_mshr_uncacheable_misses 15341 # number of overall MSHR uncacheable misses +system.cpu2.l1c.overall_mshr_miss_latency 1421241469 # number of overall MSHR miss cycles +system.cpu2.l1c.overall_mshr_miss_rate 0.876992 # mshr miss rate for overall accesses +system.cpu2.l1c.overall_mshr_misses 60530 # number of overall MSHR misses +system.cpu2.l1c.overall_mshr_uncacheable_latency 840798944 # number of overall MSHR uncacheable cycles +system.cpu2.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu2.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu2.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu2.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -290,104 +233,76 @@ system.cpu2.l1c.prefetcher.num_hwpf_issued 0 # system.cpu2.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu2.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu2.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu2.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu2.l1c.protocol.read_invalid 1818161 # read misses to invalid blocks -system.cpu2.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu2.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu2.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu2.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu2.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu2.l1c.protocol.snoop_read_exclusive 2846 # read snoops on exclusive blocks -system.cpu2.l1c.protocol.snoop_read_modified 12505 # read snoops on modified blocks -system.cpu2.l1c.protocol.snoop_read_owned 7354 # read snoops on owned blocks -system.cpu2.l1c.protocol.snoop_read_shared 1719896 # read snoops on shared blocks -system.cpu2.l1c.protocol.snoop_readex_exclusive 1512 # readEx snoops on exclusive blocks -system.cpu2.l1c.protocol.snoop_readex_modified 6836 # readEx snoops on modified blocks -system.cpu2.l1c.protocol.snoop_readex_owned 4066 # readEx snoops on owned blocks -system.cpu2.l1c.protocol.snoop_readex_shared 12494 # readEx snoops on shared blocks -system.cpu2.l1c.protocol.snoop_upgrade_owned 828 # upgrade snoops on owned blocks -system.cpu2.l1c.protocol.snoop_upgrade_shared 2975 # upgradee snoops on shared blocks -system.cpu2.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu2.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu2.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu2.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu2.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu2.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu2.l1c.protocol.write_invalid 1061132 # write misses to invalid blocks -system.cpu2.l1c.protocol.write_owned 1410 # write misses to owned blocks -system.cpu2.l1c.protocol.write_shared 4436 # write misses to shared blocks -system.cpu2.l1c.replacements 27337 # number of replacements -system.cpu2.l1c.sampled_refs 27674 # Sample count of references to valid blocks. +system.cpu2.l1c.replacements 27570 # number of replacements +system.cpu2.l1c.sampled_refs 27912 # Sample count of references to valid blocks. system.cpu2.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu2.l1c.tagsinuse 343.290844 # Cycle average of tags in use -system.cpu2.l1c.total_refs 11477 # Total number of references to valid blocks. +system.cpu2.l1c.tagsinuse 346.579014 # Cycle average of tags in use +system.cpu2.l1c.total_refs 11643 # Total number of references to valid blocks. system.cpu2.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu2.l1c.writebacks 10872 # number of writebacks +system.cpu2.l1c.writebacks 10678 # number of writebacks system.cpu2.num_copies 0 # number of copy accesses completed -system.cpu2.num_reads 98887 # number of read accesses completed -system.cpu2.num_writes 53640 # number of write accesses completed -system.cpu3.l1c.ReadReq_accesses 44566 # number of ReadReq accesses(hits+misses) -system.cpu3.l1c.ReadReq_avg_miss_latency 14066.553951 # average ReadReq miss latency -system.cpu3.l1c.ReadReq_avg_mshr_miss_latency 13052.525235 # average ReadReq mshr miss latency -system.cpu3.l1c.ReadReq_hits 7375 # number of ReadReq hits -system.cpu3.l1c.ReadReq_miss_latency 523149208 # number of ReadReq miss cycles -system.cpu3.l1c.ReadReq_miss_rate 0.834515 # miss rate for ReadReq accesses -system.cpu3.l1c.ReadReq_misses 37191 # number of ReadReq misses -system.cpu3.l1c.ReadReq_mshr_miss_latency 485436466 # number of ReadReq MSHR miss cycles -system.cpu3.l1c.ReadReq_mshr_miss_rate 0.834515 # mshr miss rate for ReadReq accesses -system.cpu3.l1c.ReadReq_mshr_misses 37191 # number of ReadReq MSHR misses -system.cpu3.l1c.ReadReq_mshr_uncacheable 9820 # number of ReadReq MSHR uncacheable -system.cpu3.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu3.l1c.ReadResp_mshr_uncacheable_latency 252799971 # number of ReadResp MSHR uncacheable cycles -system.cpu3.l1c.WriteReq_accesses 24030 # number of WriteReq accesses(hits+misses) -system.cpu3.l1c.WriteReq_avg_miss_latency 12807.474484 # average WriteReq miss latency -system.cpu3.l1c.WriteReq_avg_mshr_miss_latency 11345.837164 # average WriteReq mshr miss latency -system.cpu3.l1c.WriteReq_hits 1142 # number of WriteReq hits -system.cpu3.l1c.WriteReq_miss_latency 293137476 # number of WriteReq miss cycles -system.cpu3.l1c.WriteReq_miss_rate 0.952476 # miss rate for WriteReq accesses -system.cpu3.l1c.WriteReq_misses 22888 # number of WriteReq misses -system.cpu3.l1c.WriteReq_mshr_miss_latency 259683521 # number of WriteReq MSHR miss cycles -system.cpu3.l1c.WriteReq_mshr_miss_rate 0.952476 # mshr miss rate for WriteReq accesses -system.cpu3.l1c.WriteReq_mshr_misses 22888 # number of WriteReq MSHR misses -system.cpu3.l1c.WriteReq_mshr_uncacheable 5294 # number of WriteReq MSHR uncacheable -system.cpu3.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu3.l1c.WriteResp_mshr_uncacheable_latency 159218905 # number of WriteResp MSHR uncacheable cycles -system.cpu3.l1c.avg_blocked_cycles_no_mshrs 1193.729049 # average number of cycles each access was blocked +system.cpu2.num_reads 99982 # number of read accesses completed +system.cpu2.num_writes 53451 # number of write accesses completed +system.cpu3.l1c.ReadReq_accesses 45026 # number of ReadReq accesses(hits+misses) +system.cpu3.l1c.ReadReq_avg_miss_latency 22627.689991 # average ReadReq miss latency +system.cpu3.l1c.ReadReq_avg_mshr_miss_latency 22619.691218 # average ReadReq mshr miss latency +system.cpu3.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu3.l1c.ReadReq_hits 7540 # number of ReadReq hits +system.cpu3.l1c.ReadReq_miss_latency 848221587 # number of ReadReq miss cycles +system.cpu3.l1c.ReadReq_miss_rate 0.832541 # miss rate for ReadReq accesses +system.cpu3.l1c.ReadReq_misses 37486 # number of ReadReq misses +system.cpu3.l1c.ReadReq_mshr_miss_latency 847921745 # number of ReadReq MSHR miss cycles +system.cpu3.l1c.ReadReq_mshr_miss_rate 0.832541 # mshr miss rate for ReadReq accesses +system.cpu3.l1c.ReadReq_mshr_misses 37486 # number of ReadReq MSHR misses +system.cpu3.l1c.ReadReq_mshr_uncacheable_latency 521058272 # number of ReadReq MSHR uncacheable cycles +system.cpu3.l1c.WriteReq_accesses 24496 # number of WriteReq accesses(hits+misses) +system.cpu3.l1c.WriteReq_avg_miss_latency 24499.134103 # average WriteReq miss latency +system.cpu3.l1c.WriteReq_avg_mshr_miss_latency 24491.950730 # average WriteReq mshr miss latency +system.cpu3.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu3.l1c.WriteReq_hits 932 # number of WriteReq hits +system.cpu3.l1c.WriteReq_miss_latency 577297596 # number of WriteReq miss cycles +system.cpu3.l1c.WriteReq_miss_rate 0.961953 # miss rate for WriteReq accesses +system.cpu3.l1c.WriteReq_misses 23564 # number of WriteReq misses +system.cpu3.l1c.WriteReq_mshr_miss_latency 577128327 # number of WriteReq MSHR miss cycles +system.cpu3.l1c.WriteReq_mshr_miss_rate 0.961953 # mshr miss rate for WriteReq accesses +system.cpu3.l1c.WriteReq_mshr_misses 23564 # number of WriteReq MSHR misses +system.cpu3.l1c.WriteReq_mshr_uncacheable_latency 316556554 # number of WriteReq MSHR uncacheable cycles +system.cpu3.l1c.avg_blocked_cycles_no_mshrs 2277.071019 # average number of cycles each access was blocked system.cpu3.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu3.l1c.avg_refs 0.411345 # Average number of references to valid blocks. -system.cpu3.l1c.blocked_no_mshrs 69160 # number of cycles access was blocked +system.cpu3.l1c.avg_refs 0.408241 # Average number of references to valid blocks. +system.cpu3.l1c.blocked_no_mshrs 69700 # number of cycles access was blocked system.cpu3.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu3.l1c.blocked_cycles_no_mshrs 82558301 # number of cycles access was blocked +system.cpu3.l1c.blocked_cycles_no_mshrs 158711850 # number of cycles access was blocked system.cpu3.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu3.l1c.cache_copies 0 # number of cache copies performed -system.cpu3.l1c.demand_accesses 68596 # number of demand (read+write) accesses -system.cpu3.l1c.demand_avg_miss_latency 13586.888663 # average overall miss latency -system.cpu3.l1c.demand_avg_mshr_miss_latency 12402.336707 # average overall mshr miss latency -system.cpu3.l1c.demand_hits 8517 # number of demand (read+write) hits -system.cpu3.l1c.demand_miss_latency 816286684 # number of demand (read+write) miss cycles -system.cpu3.l1c.demand_miss_rate 0.875838 # miss rate for demand accesses -system.cpu3.l1c.demand_misses 60079 # number of demand (read+write) misses +system.cpu3.l1c.demand_accesses 69522 # number of demand (read+write) accesses +system.cpu3.l1c.demand_avg_miss_latency 23350.027568 # average overall miss latency +system.cpu3.l1c.demand_avg_mshr_miss_latency 23342.343522 # average overall mshr miss latency +system.cpu3.l1c.demand_hits 8472 # number of demand (read+write) hits +system.cpu3.l1c.demand_miss_latency 1425519183 # number of demand (read+write) miss cycles +system.cpu3.l1c.demand_miss_rate 0.878139 # miss rate for demand accesses +system.cpu3.l1c.demand_misses 61050 # number of demand (read+write) misses system.cpu3.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu3.l1c.demand_mshr_miss_latency 745119987 # number of demand (read+write) MSHR miss cycles -system.cpu3.l1c.demand_mshr_miss_rate 0.875838 # mshr miss rate for demand accesses -system.cpu3.l1c.demand_mshr_misses 60079 # number of demand (read+write) MSHR misses +system.cpu3.l1c.demand_mshr_miss_latency 1425050072 # number of demand (read+write) MSHR miss cycles +system.cpu3.l1c.demand_mshr_miss_rate 0.878139 # mshr miss rate for demand accesses +system.cpu3.l1c.demand_mshr_misses 61050 # number of demand (read+write) MSHR misses system.cpu3.l1c.fast_writes 0 # number of fast writes performed system.cpu3.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu3.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu3.l1c.overall_accesses 68596 # number of overall (read+write) accesses -system.cpu3.l1c.overall_avg_miss_latency 13586.888663 # average overall miss latency -system.cpu3.l1c.overall_avg_mshr_miss_latency 12402.336707 # average overall mshr miss latency -system.cpu3.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu3.l1c.overall_hits 8517 # number of overall hits -system.cpu3.l1c.overall_miss_latency 816286684 # number of overall miss cycles -system.cpu3.l1c.overall_miss_rate 0.875838 # miss rate for overall accesses -system.cpu3.l1c.overall_misses 60079 # number of overall misses +system.cpu3.l1c.overall_accesses 69522 # number of overall (read+write) accesses +system.cpu3.l1c.overall_avg_miss_latency 23350.027568 # average overall miss latency +system.cpu3.l1c.overall_avg_mshr_miss_latency 23342.343522 # average overall mshr miss latency +system.cpu3.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu3.l1c.overall_hits 8472 # number of overall hits +system.cpu3.l1c.overall_miss_latency 1425519183 # number of overall miss cycles +system.cpu3.l1c.overall_miss_rate 0.878139 # miss rate for overall accesses +system.cpu3.l1c.overall_misses 61050 # number of overall misses system.cpu3.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu3.l1c.overall_mshr_miss_latency 745119987 # number of overall MSHR miss cycles -system.cpu3.l1c.overall_mshr_miss_rate 0.875838 # mshr miss rate for overall accesses -system.cpu3.l1c.overall_mshr_misses 60079 # number of overall MSHR misses -system.cpu3.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu3.l1c.overall_mshr_uncacheable_misses 15114 # number of overall MSHR uncacheable misses +system.cpu3.l1c.overall_mshr_miss_latency 1425050072 # number of overall MSHR miss cycles +system.cpu3.l1c.overall_mshr_miss_rate 0.878139 # mshr miss rate for overall accesses +system.cpu3.l1c.overall_mshr_misses 61050 # number of overall MSHR misses +system.cpu3.l1c.overall_mshr_uncacheable_latency 837614826 # number of overall MSHR uncacheable cycles +system.cpu3.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu3.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu3.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu3.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -397,104 +312,76 @@ system.cpu3.l1c.prefetcher.num_hwpf_issued 0 # system.cpu3.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu3.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu3.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu3.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu3.l1c.protocol.read_invalid 1894373 # read misses to invalid blocks -system.cpu3.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu3.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu3.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu3.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu3.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu3.l1c.protocol.snoop_read_exclusive 2902 # read snoops on exclusive blocks -system.cpu3.l1c.protocol.snoop_read_modified 12291 # read snoops on modified blocks -system.cpu3.l1c.protocol.snoop_read_owned 7221 # read snoops on owned blocks -system.cpu3.l1c.protocol.snoop_read_shared 1743434 # read snoops on shared blocks -system.cpu3.l1c.protocol.snoop_readex_exclusive 1553 # readEx snoops on exclusive blocks -system.cpu3.l1c.protocol.snoop_readex_modified 6822 # readEx snoops on modified blocks -system.cpu3.l1c.protocol.snoop_readex_owned 3914 # readEx snoops on owned blocks -system.cpu3.l1c.protocol.snoop_readex_shared 12477 # readEx snoops on shared blocks -system.cpu3.l1c.protocol.snoop_upgrade_owned 867 # upgrade snoops on owned blocks -system.cpu3.l1c.protocol.snoop_upgrade_shared 3008 # upgradee snoops on shared blocks -system.cpu3.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu3.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu3.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu3.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu3.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu3.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu3.l1c.protocol.write_invalid 1046634 # write misses to invalid blocks -system.cpu3.l1c.protocol.write_owned 1364 # write misses to owned blocks -system.cpu3.l1c.protocol.write_shared 4484 # write misses to shared blocks -system.cpu3.l1c.replacements 27286 # number of replacements -system.cpu3.l1c.sampled_refs 27624 # Sample count of references to valid blocks. +system.cpu3.l1c.replacements 28153 # number of replacements +system.cpu3.l1c.sampled_refs 28515 # Sample count of references to valid blocks. system.cpu3.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu3.l1c.tagsinuse 342.290575 # Cycle average of tags in use -system.cpu3.l1c.total_refs 11363 # Total number of references to valid blocks. +system.cpu3.l1c.tagsinuse 348.493440 # Cycle average of tags in use +system.cpu3.l1c.total_refs 11641 # Total number of references to valid blocks. system.cpu3.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu3.l1c.writebacks 10681 # number of writebacks +system.cpu3.l1c.writebacks 11085 # number of writebacks system.cpu3.num_copies 0 # number of copy accesses completed -system.cpu3.num_reads 99322 # number of read accesses completed -system.cpu3.num_writes 53280 # number of write accesses completed -system.cpu4.l1c.ReadReq_accesses 44971 # number of ReadReq accesses(hits+misses) -system.cpu4.l1c.ReadReq_avg_miss_latency 13943.186039 # average ReadReq miss latency -system.cpu4.l1c.ReadReq_avg_mshr_miss_latency 12937.718615 # average ReadReq mshr miss latency -system.cpu4.l1c.ReadReq_hits 7581 # number of ReadReq hits -system.cpu4.l1c.ReadReq_miss_latency 521335726 # number of ReadReq miss cycles -system.cpu4.l1c.ReadReq_miss_rate 0.831425 # miss rate for ReadReq accesses -system.cpu4.l1c.ReadReq_misses 37390 # number of ReadReq misses -system.cpu4.l1c.ReadReq_mshr_miss_latency 483741299 # number of ReadReq MSHR miss cycles -system.cpu4.l1c.ReadReq_mshr_miss_rate 0.831425 # mshr miss rate for ReadReq accesses -system.cpu4.l1c.ReadReq_mshr_misses 37390 # number of ReadReq MSHR misses -system.cpu4.l1c.ReadReq_mshr_uncacheable 9931 # number of ReadReq MSHR uncacheable -system.cpu4.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu4.l1c.ReadResp_mshr_uncacheable_latency 254015216 # number of ReadResp MSHR uncacheable cycles -system.cpu4.l1c.WriteReq_accesses 24134 # number of WriteReq accesses(hits+misses) -system.cpu4.l1c.WriteReq_avg_miss_latency 12764.573629 # average WriteReq miss latency -system.cpu4.l1c.WriteReq_avg_mshr_miss_latency 11273.971841 # average WriteReq mshr miss latency -system.cpu4.l1c.WriteReq_hits 1086 # number of WriteReq hits -system.cpu4.l1c.WriteReq_miss_latency 294197893 # number of WriteReq miss cycles -system.cpu4.l1c.WriteReq_miss_rate 0.955001 # miss rate for WriteReq accesses -system.cpu4.l1c.WriteReq_misses 23048 # number of WriteReq misses -system.cpu4.l1c.WriteReq_mshr_miss_latency 259842503 # number of WriteReq MSHR miss cycles -system.cpu4.l1c.WriteReq_mshr_miss_rate 0.955001 # mshr miss rate for WriteReq accesses -system.cpu4.l1c.WriteReq_mshr_misses 23048 # number of WriteReq MSHR misses -system.cpu4.l1c.WriteReq_mshr_uncacheable 5390 # number of WriteReq MSHR uncacheable -system.cpu4.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu4.l1c.WriteResp_mshr_uncacheable_latency 161643344 # number of WriteResp MSHR uncacheable cycles -system.cpu4.l1c.avg_blocked_cycles_no_mshrs 1186.636056 # average number of cycles each access was blocked +system.cpu3.num_reads 99697 # number of read accesses completed +system.cpu3.num_writes 54254 # number of write accesses completed +system.cpu4.l1c.ReadReq_accesses 44695 # number of ReadReq accesses(hits+misses) +system.cpu4.l1c.ReadReq_avg_miss_latency 22595.724111 # average ReadReq miss latency +system.cpu4.l1c.ReadReq_avg_mshr_miss_latency 22587.725051 # average ReadReq mshr miss latency +system.cpu4.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu4.l1c.ReadReq_hits 7459 # number of ReadReq hits +system.cpu4.l1c.ReadReq_miss_latency 841374383 # number of ReadReq miss cycles +system.cpu4.l1c.ReadReq_miss_rate 0.833113 # miss rate for ReadReq accesses +system.cpu4.l1c.ReadReq_misses 37236 # number of ReadReq misses +system.cpu4.l1c.ReadReq_mshr_miss_latency 841076530 # number of ReadReq MSHR miss cycles +system.cpu4.l1c.ReadReq_mshr_miss_rate 0.833113 # mshr miss rate for ReadReq accesses +system.cpu4.l1c.ReadReq_mshr_misses 37236 # number of ReadReq MSHR misses +system.cpu4.l1c.ReadReq_mshr_uncacheable_latency 521925270 # number of ReadReq MSHR uncacheable cycles +system.cpu4.l1c.WriteReq_accesses 24320 # number of WriteReq accesses(hits+misses) +system.cpu4.l1c.WriteReq_avg_miss_latency 24976.967619 # average WriteReq miss latency +system.cpu4.l1c.WriteReq_avg_mshr_miss_latency 24969.752460 # average WriteReq mshr miss latency +system.cpu4.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu4.l1c.WriteReq_hits 942 # number of WriteReq hits +system.cpu4.l1c.WriteReq_miss_latency 583911549 # number of WriteReq miss cycles +system.cpu4.l1c.WriteReq_miss_rate 0.961266 # miss rate for WriteReq accesses +system.cpu4.l1c.WriteReq_misses 23378 # number of WriteReq misses +system.cpu4.l1c.WriteReq_mshr_miss_latency 583742873 # number of WriteReq MSHR miss cycles +system.cpu4.l1c.WriteReq_mshr_miss_rate 0.961266 # mshr miss rate for WriteReq accesses +system.cpu4.l1c.WriteReq_mshr_misses 23378 # number of WriteReq MSHR misses +system.cpu4.l1c.WriteReq_mshr_uncacheable_latency 314744590 # number of WriteReq MSHR uncacheable cycles +system.cpu4.l1c.avg_blocked_cycles_no_mshrs 2286.910395 # average number of cycles each access was blocked system.cpu4.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu4.l1c.avg_refs 0.410931 # Average number of references to valid blocks. -system.cpu4.l1c.blocked_no_mshrs 69637 # number of cycles access was blocked +system.cpu4.l1c.avg_refs 0.401516 # Average number of references to valid blocks. +system.cpu4.l1c.blocked_no_mshrs 69382 # number of cycles access was blocked system.cpu4.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu4.l1c.blocked_cycles_no_mshrs 82633775 # number of cycles access was blocked +system.cpu4.l1c.blocked_cycles_no_mshrs 158670417 # number of cycles access was blocked system.cpu4.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu4.l1c.cache_copies 0 # number of cache copies performed -system.cpu4.l1c.demand_accesses 69105 # number of demand (read+write) accesses -system.cpu4.l1c.demand_avg_miss_latency 13493.722807 # average overall miss latency -system.cpu4.l1c.demand_avg_mshr_miss_latency 12303.249644 # average overall mshr miss latency -system.cpu4.l1c.demand_hits 8667 # number of demand (read+write) hits -system.cpu4.l1c.demand_miss_latency 815533619 # number of demand (read+write) miss cycles -system.cpu4.l1c.demand_miss_rate 0.874582 # miss rate for demand accesses -system.cpu4.l1c.demand_misses 60438 # number of demand (read+write) misses +system.cpu4.l1c.demand_accesses 69015 # number of demand (read+write) accesses +system.cpu4.l1c.demand_avg_miss_latency 23514.137526 # average overall miss latency +system.cpu4.l1c.demand_avg_mshr_miss_latency 23506.440806 # average overall mshr miss latency +system.cpu4.l1c.demand_hits 8401 # number of demand (read+write) hits +system.cpu4.l1c.demand_miss_latency 1425285932 # number of demand (read+write) miss cycles +system.cpu4.l1c.demand_miss_rate 0.878273 # miss rate for demand accesses +system.cpu4.l1c.demand_misses 60614 # number of demand (read+write) misses system.cpu4.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu4.l1c.demand_mshr_miss_latency 743583802 # number of demand (read+write) MSHR miss cycles -system.cpu4.l1c.demand_mshr_miss_rate 0.874582 # mshr miss rate for demand accesses -system.cpu4.l1c.demand_mshr_misses 60438 # number of demand (read+write) MSHR misses +system.cpu4.l1c.demand_mshr_miss_latency 1424819403 # number of demand (read+write) MSHR miss cycles +system.cpu4.l1c.demand_mshr_miss_rate 0.878273 # mshr miss rate for demand accesses +system.cpu4.l1c.demand_mshr_misses 60614 # number of demand (read+write) MSHR misses system.cpu4.l1c.fast_writes 0 # number of fast writes performed system.cpu4.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu4.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu4.l1c.overall_accesses 69105 # number of overall (read+write) accesses -system.cpu4.l1c.overall_avg_miss_latency 13493.722807 # average overall miss latency -system.cpu4.l1c.overall_avg_mshr_miss_latency 12303.249644 # average overall mshr miss latency -system.cpu4.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu4.l1c.overall_hits 8667 # number of overall hits -system.cpu4.l1c.overall_miss_latency 815533619 # number of overall miss cycles -system.cpu4.l1c.overall_miss_rate 0.874582 # miss rate for overall accesses -system.cpu4.l1c.overall_misses 60438 # number of overall misses +system.cpu4.l1c.overall_accesses 69015 # number of overall (read+write) accesses +system.cpu4.l1c.overall_avg_miss_latency 23514.137526 # average overall miss latency +system.cpu4.l1c.overall_avg_mshr_miss_latency 23506.440806 # average overall mshr miss latency +system.cpu4.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu4.l1c.overall_hits 8401 # number of overall hits +system.cpu4.l1c.overall_miss_latency 1425285932 # number of overall miss cycles +system.cpu4.l1c.overall_miss_rate 0.878273 # miss rate for overall accesses +system.cpu4.l1c.overall_misses 60614 # number of overall misses system.cpu4.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu4.l1c.overall_mshr_miss_latency 743583802 # number of overall MSHR miss cycles -system.cpu4.l1c.overall_mshr_miss_rate 0.874582 # mshr miss rate for overall accesses -system.cpu4.l1c.overall_mshr_misses 60438 # number of overall MSHR misses -system.cpu4.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu4.l1c.overall_mshr_uncacheable_misses 15321 # number of overall MSHR uncacheable misses +system.cpu4.l1c.overall_mshr_miss_latency 1424819403 # number of overall MSHR miss cycles +system.cpu4.l1c.overall_mshr_miss_rate 0.878273 # mshr miss rate for overall accesses +system.cpu4.l1c.overall_mshr_misses 60614 # number of overall MSHR misses +system.cpu4.l1c.overall_mshr_uncacheable_latency 836669860 # number of overall MSHR uncacheable cycles +system.cpu4.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu4.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu4.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu4.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -504,104 +391,76 @@ system.cpu4.l1c.prefetcher.num_hwpf_issued 0 # system.cpu4.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu4.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu4.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu4.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu4.l1c.protocol.read_invalid 1830675 # read misses to invalid blocks -system.cpu4.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu4.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu4.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu4.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu4.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu4.l1c.protocol.snoop_read_exclusive 2847 # read snoops on exclusive blocks -system.cpu4.l1c.protocol.snoop_read_modified 12499 # read snoops on modified blocks -system.cpu4.l1c.protocol.snoop_read_owned 7458 # read snoops on owned blocks -system.cpu4.l1c.protocol.snoop_read_shared 1765770 # read snoops on shared blocks -system.cpu4.l1c.protocol.snoop_readex_exclusive 1560 # readEx snoops on exclusive blocks -system.cpu4.l1c.protocol.snoop_readex_modified 6711 # readEx snoops on modified blocks -system.cpu4.l1c.protocol.snoop_readex_owned 3919 # readEx snoops on owned blocks -system.cpu4.l1c.protocol.snoop_readex_shared 12526 # readEx snoops on shared blocks -system.cpu4.l1c.protocol.snoop_upgrade_owned 902 # upgrade snoops on owned blocks -system.cpu4.l1c.protocol.snoop_upgrade_shared 3023 # upgradee snoops on shared blocks -system.cpu4.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu4.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu4.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu4.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu4.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu4.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu4.l1c.protocol.write_invalid 854606 # write misses to invalid blocks -system.cpu4.l1c.protocol.write_owned 1318 # write misses to owned blocks -system.cpu4.l1c.protocol.write_shared 4519 # write misses to shared blocks -system.cpu4.l1c.replacements 27664 # number of replacements -system.cpu4.l1c.sampled_refs 28012 # Sample count of references to valid blocks. +system.cpu4.l1c.replacements 28031 # number of replacements +system.cpu4.l1c.sampled_refs 28370 # Sample count of references to valid blocks. system.cpu4.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu4.l1c.tagsinuse 344.185288 # Cycle average of tags in use -system.cpu4.l1c.total_refs 11511 # Total number of references to valid blocks. +system.cpu4.l1c.tagsinuse 347.544315 # Cycle average of tags in use +system.cpu4.l1c.total_refs 11391 # Total number of references to valid blocks. system.cpu4.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu4.l1c.writebacks 10935 # number of writebacks +system.cpu4.l1c.writebacks 11138 # number of writebacks system.cpu4.num_copies 0 # number of copy accesses completed -system.cpu4.num_reads 99841 # number of read accesses completed -system.cpu4.num_writes 54005 # number of write accesses completed -system.cpu5.l1c.ReadReq_accesses 45075 # number of ReadReq accesses(hits+misses) -system.cpu5.l1c.ReadReq_avg_miss_latency 13980.675167 # average ReadReq miss latency -system.cpu5.l1c.ReadReq_avg_mshr_miss_latency 12974.186518 # average ReadReq mshr miss latency -system.cpu5.l1c.ReadReq_hits 7588 # number of ReadReq hits -system.cpu5.l1c.ReadReq_miss_latency 524093570 # number of ReadReq miss cycles -system.cpu5.l1c.ReadReq_miss_rate 0.831658 # miss rate for ReadReq accesses -system.cpu5.l1c.ReadReq_misses 37487 # number of ReadReq misses -system.cpu5.l1c.ReadReq_mshr_miss_latency 486363330 # number of ReadReq MSHR miss cycles -system.cpu5.l1c.ReadReq_mshr_miss_rate 0.831658 # mshr miss rate for ReadReq accesses -system.cpu5.l1c.ReadReq_mshr_misses 37487 # number of ReadReq MSHR misses -system.cpu5.l1c.ReadReq_mshr_uncacheable 9769 # number of ReadReq MSHR uncacheable -system.cpu5.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu5.l1c.ReadResp_mshr_uncacheable_latency 252483534 # number of ReadResp MSHR uncacheable cycles -system.cpu5.l1c.WriteReq_accesses 24120 # number of WriteReq accesses(hits+misses) -system.cpu5.l1c.WriteReq_avg_miss_latency 12733.111936 # average WriteReq miss latency -system.cpu5.l1c.WriteReq_avg_mshr_miss_latency 11249.826210 # average WriteReq mshr miss latency -system.cpu5.l1c.WriteReq_hits 1098 # number of WriteReq hits -system.cpu5.l1c.WriteReq_miss_latency 293141703 # number of WriteReq miss cycles -system.cpu5.l1c.WriteReq_miss_rate 0.954478 # miss rate for WriteReq accesses -system.cpu5.l1c.WriteReq_misses 23022 # number of WriteReq misses -system.cpu5.l1c.WriteReq_mshr_miss_latency 258993499 # number of WriteReq MSHR miss cycles -system.cpu5.l1c.WriteReq_mshr_miss_rate 0.954478 # mshr miss rate for WriteReq accesses -system.cpu5.l1c.WriteReq_mshr_misses 23022 # number of WriteReq MSHR misses -system.cpu5.l1c.WriteReq_mshr_uncacheable 5232 # number of WriteReq MSHR uncacheable -system.cpu5.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu5.l1c.WriteResp_mshr_uncacheable_latency 155064988 # number of WriteResp MSHR uncacheable cycles -system.cpu5.l1c.avg_blocked_cycles_no_mshrs 1188.349008 # average number of cycles each access was blocked +system.cpu4.num_reads 99375 # number of read accesses completed +system.cpu4.num_writes 53856 # number of write accesses completed +system.cpu5.l1c.ReadReq_accesses 44846 # number of ReadReq accesses(hits+misses) +system.cpu5.l1c.ReadReq_avg_miss_latency 22795.859807 # average ReadReq miss latency +system.cpu5.l1c.ReadReq_avg_mshr_miss_latency 22787.860584 # average ReadReq mshr miss latency +system.cpu5.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu5.l1c.ReadReq_hits 7526 # number of ReadReq hits +system.cpu5.l1c.ReadReq_miss_latency 850741488 # number of ReadReq miss cycles +system.cpu5.l1c.ReadReq_miss_rate 0.832181 # miss rate for ReadReq accesses +system.cpu5.l1c.ReadReq_misses 37320 # number of ReadReq misses +system.cpu5.l1c.ReadReq_mshr_miss_latency 850442957 # number of ReadReq MSHR miss cycles +system.cpu5.l1c.ReadReq_mshr_miss_rate 0.832181 # mshr miss rate for ReadReq accesses +system.cpu5.l1c.ReadReq_mshr_misses 37320 # number of ReadReq MSHR misses +system.cpu5.l1c.ReadReq_mshr_uncacheable_latency 518680326 # number of ReadReq MSHR uncacheable cycles +system.cpu5.l1c.WriteReq_accesses 24378 # number of WriteReq accesses(hits+misses) +system.cpu5.l1c.WriteReq_avg_miss_latency 24686.676265 # average WriteReq miss latency +system.cpu5.l1c.WriteReq_avg_mshr_miss_latency 24679.493004 # average WriteReq mshr miss latency +system.cpu5.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu5.l1c.WriteReq_hits 936 # number of WriteReq hits +system.cpu5.l1c.WriteReq_miss_latency 578705065 # number of WriteReq miss cycles +system.cpu5.l1c.WriteReq_miss_rate 0.961605 # miss rate for WriteReq accesses +system.cpu5.l1c.WriteReq_misses 23442 # number of WriteReq misses +system.cpu5.l1c.WriteReq_mshr_miss_latency 578536675 # number of WriteReq MSHR miss cycles +system.cpu5.l1c.WriteReq_mshr_miss_rate 0.961605 # mshr miss rate for WriteReq accesses +system.cpu5.l1c.WriteReq_mshr_misses 23442 # number of WriteReq MSHR misses +system.cpu5.l1c.WriteReq_mshr_uncacheable_latency 315478251 # number of WriteReq MSHR uncacheable cycles +system.cpu5.l1c.avg_blocked_cycles_no_mshrs 2288.071694 # average number of cycles each access was blocked system.cpu5.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu5.l1c.avg_refs 0.414917 # Average number of references to valid blocks. -system.cpu5.l1c.blocked_no_mshrs 69537 # number of cycles access was blocked +system.cpu5.l1c.avg_refs 0.412333 # Average number of references to valid blocks. +system.cpu5.l1c.blocked_no_mshrs 69434 # number of cycles access was blocked system.cpu5.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu5.l1c.blocked_cycles_no_mshrs 82634225 # number of cycles access was blocked +system.cpu5.l1c.blocked_cycles_no_mshrs 158869970 # number of cycles access was blocked system.cpu5.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu5.l1c.cache_copies 0 # number of cache copies performed -system.cpu5.l1c.demand_accesses 69195 # number of demand (read+write) accesses -system.cpu5.l1c.demand_avg_miss_latency 13506.011883 # average overall miss latency -system.cpu5.l1c.demand_avg_mshr_miss_latency 12318.115140 # average overall mshr miss latency -system.cpu5.l1c.demand_hits 8686 # number of demand (read+write) hits -system.cpu5.l1c.demand_miss_latency 817235273 # number of demand (read+write) miss cycles -system.cpu5.l1c.demand_miss_rate 0.874471 # miss rate for demand accesses -system.cpu5.l1c.demand_misses 60509 # number of demand (read+write) misses +system.cpu5.l1c.demand_accesses 69224 # number of demand (read+write) accesses +system.cpu5.l1c.demand_avg_miss_latency 23525.337431 # average overall miss latency +system.cpu5.l1c.demand_avg_mshr_miss_latency 23517.653007 # average overall mshr miss latency +system.cpu5.l1c.demand_hits 8462 # number of demand (read+write) hits +system.cpu5.l1c.demand_miss_latency 1429446553 # number of demand (read+write) miss cycles +system.cpu5.l1c.demand_miss_rate 0.877759 # miss rate for demand accesses +system.cpu5.l1c.demand_misses 60762 # number of demand (read+write) misses system.cpu5.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu5.l1c.demand_mshr_miss_latency 745356829 # number of demand (read+write) MSHR miss cycles -system.cpu5.l1c.demand_mshr_miss_rate 0.874471 # mshr miss rate for demand accesses -system.cpu5.l1c.demand_mshr_misses 60509 # number of demand (read+write) MSHR misses +system.cpu5.l1c.demand_mshr_miss_latency 1428979632 # number of demand (read+write) MSHR miss cycles +system.cpu5.l1c.demand_mshr_miss_rate 0.877759 # mshr miss rate for demand accesses +system.cpu5.l1c.demand_mshr_misses 60762 # number of demand (read+write) MSHR misses system.cpu5.l1c.fast_writes 0 # number of fast writes performed system.cpu5.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu5.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu5.l1c.overall_accesses 69195 # number of overall (read+write) accesses -system.cpu5.l1c.overall_avg_miss_latency 13506.011883 # average overall miss latency -system.cpu5.l1c.overall_avg_mshr_miss_latency 12318.115140 # average overall mshr miss latency -system.cpu5.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu5.l1c.overall_hits 8686 # number of overall hits -system.cpu5.l1c.overall_miss_latency 817235273 # number of overall miss cycles -system.cpu5.l1c.overall_miss_rate 0.874471 # miss rate for overall accesses -system.cpu5.l1c.overall_misses 60509 # number of overall misses +system.cpu5.l1c.overall_accesses 69224 # number of overall (read+write) accesses +system.cpu5.l1c.overall_avg_miss_latency 23525.337431 # average overall miss latency +system.cpu5.l1c.overall_avg_mshr_miss_latency 23517.653007 # average overall mshr miss latency +system.cpu5.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu5.l1c.overall_hits 8462 # number of overall hits +system.cpu5.l1c.overall_miss_latency 1429446553 # number of overall miss cycles +system.cpu5.l1c.overall_miss_rate 0.877759 # miss rate for overall accesses +system.cpu5.l1c.overall_misses 60762 # number of overall misses system.cpu5.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu5.l1c.overall_mshr_miss_latency 745356829 # number of overall MSHR miss cycles -system.cpu5.l1c.overall_mshr_miss_rate 0.874471 # mshr miss rate for overall accesses -system.cpu5.l1c.overall_mshr_misses 60509 # number of overall MSHR misses -system.cpu5.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu5.l1c.overall_mshr_uncacheable_misses 15001 # number of overall MSHR uncacheable misses +system.cpu5.l1c.overall_mshr_miss_latency 1428979632 # number of overall MSHR miss cycles +system.cpu5.l1c.overall_mshr_miss_rate 0.877759 # mshr miss rate for overall accesses +system.cpu5.l1c.overall_mshr_misses 60762 # number of overall MSHR misses +system.cpu5.l1c.overall_mshr_uncacheable_latency 834158577 # number of overall MSHR uncacheable cycles +system.cpu5.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu5.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu5.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu5.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -611,104 +470,76 @@ system.cpu5.l1c.prefetcher.num_hwpf_issued 0 # system.cpu5.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu5.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu5.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu5.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu5.l1c.protocol.read_invalid 1718821 # read misses to invalid blocks -system.cpu5.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu5.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu5.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu5.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu5.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu5.l1c.protocol.snoop_read_exclusive 2926 # read snoops on exclusive blocks -system.cpu5.l1c.protocol.snoop_read_modified 12465 # read snoops on modified blocks -system.cpu5.l1c.protocol.snoop_read_owned 7201 # read snoops on owned blocks -system.cpu5.l1c.protocol.snoop_read_shared 1810557 # read snoops on shared blocks -system.cpu5.l1c.protocol.snoop_readex_exclusive 1622 # readEx snoops on exclusive blocks -system.cpu5.l1c.protocol.snoop_readex_modified 6690 # readEx snoops on modified blocks -system.cpu5.l1c.protocol.snoop_readex_owned 3947 # readEx snoops on owned blocks -system.cpu5.l1c.protocol.snoop_readex_shared 12574 # readEx snoops on shared blocks -system.cpu5.l1c.protocol.snoop_upgrade_owned 818 # upgrade snoops on owned blocks -system.cpu5.l1c.protocol.snoop_upgrade_shared 3092 # upgradee snoops on shared blocks -system.cpu5.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu5.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu5.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu5.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu5.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu5.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu5.l1c.protocol.write_invalid 914561 # write misses to invalid blocks -system.cpu5.l1c.protocol.write_owned 1422 # write misses to owned blocks -system.cpu5.l1c.protocol.write_shared 4534 # write misses to shared blocks -system.cpu5.l1c.replacements 27551 # number of replacements -system.cpu5.l1c.sampled_refs 27914 # Sample count of references to valid blocks. +system.cpu5.l1c.replacements 27718 # number of replacements +system.cpu5.l1c.sampled_refs 28055 # Sample count of references to valid blocks. system.cpu5.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu5.l1c.tagsinuse 344.440637 # Cycle average of tags in use -system.cpu5.l1c.total_refs 11582 # Total number of references to valid blocks. +system.cpu5.l1c.tagsinuse 345.552063 # Cycle average of tags in use +system.cpu5.l1c.total_refs 11568 # Total number of references to valid blocks. system.cpu5.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu5.l1c.writebacks 10931 # number of writebacks +system.cpu5.l1c.writebacks 10910 # number of writebacks system.cpu5.num_copies 0 # number of copy accesses completed -system.cpu5.num_reads 99674 # number of read accesses completed -system.cpu5.num_writes 53393 # number of write accesses completed -system.cpu6.l1c.ReadReq_accesses 44595 # number of ReadReq accesses(hits+misses) -system.cpu6.l1c.ReadReq_avg_miss_latency 14001.082353 # average ReadReq miss latency -system.cpu6.l1c.ReadReq_avg_mshr_miss_latency 12995.526917 # average ReadReq mshr miss latency -system.cpu6.l1c.ReadReq_hits 7462 # number of ReadReq hits -system.cpu6.l1c.ReadReq_miss_latency 519902191 # number of ReadReq miss cycles -system.cpu6.l1c.ReadReq_miss_rate 0.832672 # miss rate for ReadReq accesses -system.cpu6.l1c.ReadReq_misses 37133 # number of ReadReq misses -system.cpu6.l1c.ReadReq_mshr_miss_latency 482562901 # number of ReadReq MSHR miss cycles -system.cpu6.l1c.ReadReq_mshr_miss_rate 0.832672 # mshr miss rate for ReadReq accesses -system.cpu6.l1c.ReadReq_mshr_misses 37133 # number of ReadReq MSHR misses -system.cpu6.l1c.ReadReq_mshr_uncacheable 9820 # number of ReadReq MSHR uncacheable -system.cpu6.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu6.l1c.ReadResp_mshr_uncacheable_latency 251671127 # number of ReadResp MSHR uncacheable cycles -system.cpu6.l1c.WriteReq_accesses 24364 # number of WriteReq accesses(hits+misses) -system.cpu6.l1c.WriteReq_avg_miss_latency 12854.640783 # average WriteReq miss latency -system.cpu6.l1c.WriteReq_avg_mshr_miss_latency 11385.598176 # average WriteReq mshr miss latency -system.cpu6.l1c.WriteReq_hits 1222 # number of WriteReq hits -system.cpu6.l1c.WriteReq_miss_latency 297482097 # number of WriteReq miss cycles -system.cpu6.l1c.WriteReq_miss_rate 0.949844 # miss rate for WriteReq accesses -system.cpu6.l1c.WriteReq_misses 23142 # number of WriteReq misses -system.cpu6.l1c.WriteReq_mshr_miss_latency 263485513 # number of WriteReq MSHR miss cycles -system.cpu6.l1c.WriteReq_mshr_miss_rate 0.949844 # mshr miss rate for WriteReq accesses -system.cpu6.l1c.WriteReq_mshr_misses 23142 # number of WriteReq MSHR misses -system.cpu6.l1c.WriteReq_mshr_uncacheable 5447 # number of WriteReq MSHR uncacheable -system.cpu6.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu6.l1c.WriteResp_mshr_uncacheable_latency 163399316 # number of WriteResp MSHR uncacheable cycles -system.cpu6.l1c.avg_blocked_cycles_no_mshrs 1189.328084 # average number of cycles each access was blocked +system.cpu5.num_reads 99402 # number of read accesses completed +system.cpu5.num_writes 54123 # number of write accesses completed +system.cpu6.l1c.ReadReq_accesses 45284 # number of ReadReq accesses(hits+misses) +system.cpu6.l1c.ReadReq_avg_miss_latency 22614.833240 # average ReadReq miss latency +system.cpu6.l1c.ReadReq_avg_mshr_miss_latency 22606.834542 # average ReadReq mshr miss latency +system.cpu6.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu6.l1c.ReadReq_hits 7625 # number of ReadReq hits +system.cpu6.l1c.ReadReq_miss_latency 851652005 # number of ReadReq miss cycles +system.cpu6.l1c.ReadReq_miss_rate 0.831618 # miss rate for ReadReq accesses +system.cpu6.l1c.ReadReq_misses 37659 # number of ReadReq misses +system.cpu6.l1c.ReadReq_mshr_miss_latency 851350782 # number of ReadReq MSHR miss cycles +system.cpu6.l1c.ReadReq_mshr_miss_rate 0.831618 # mshr miss rate for ReadReq accesses +system.cpu6.l1c.ReadReq_mshr_misses 37659 # number of ReadReq MSHR misses +system.cpu6.l1c.ReadReq_mshr_uncacheable_latency 513879090 # number of ReadReq MSHR uncacheable cycles +system.cpu6.l1c.WriteReq_accesses 24033 # number of WriteReq accesses(hits+misses) +system.cpu6.l1c.WriteReq_avg_miss_latency 25148.091805 # average WriteReq miss latency +system.cpu6.l1c.WriteReq_avg_mshr_miss_latency 25140.890430 # average WriteReq mshr miss latency +system.cpu6.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu6.l1c.WriteReq_hits 897 # number of WriteReq hits +system.cpu6.l1c.WriteReq_miss_latency 581826252 # number of WriteReq miss cycles +system.cpu6.l1c.WriteReq_miss_rate 0.962676 # miss rate for WriteReq accesses +system.cpu6.l1c.WriteReq_misses 23136 # number of WriteReq misses +system.cpu6.l1c.WriteReq_mshr_miss_latency 581659641 # number of WriteReq MSHR miss cycles +system.cpu6.l1c.WriteReq_mshr_miss_rate 0.962676 # mshr miss rate for WriteReq accesses +system.cpu6.l1c.WriteReq_mshr_misses 23136 # number of WriteReq MSHR misses +system.cpu6.l1c.WriteReq_mshr_uncacheable_latency 312525316 # number of WriteReq MSHR uncacheable cycles +system.cpu6.l1c.avg_blocked_cycles_no_mshrs 2288.777328 # average number of cycles each access was blocked system.cpu6.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu6.l1c.avg_refs 0.411043 # Average number of references to valid blocks. -system.cpu6.l1c.blocked_no_mshrs 69345 # number of cycles access was blocked +system.cpu6.l1c.avg_refs 0.407927 # Average number of references to valid blocks. +system.cpu6.l1c.blocked_no_mshrs 69380 # number of cycles access was blocked system.cpu6.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu6.l1c.blocked_cycles_no_mshrs 82473956 # number of cycles access was blocked +system.cpu6.l1c.blocked_cycles_no_mshrs 158795371 # number of cycles access was blocked system.cpu6.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu6.l1c.cache_copies 0 # number of cache copies performed -system.cpu6.l1c.demand_accesses 68959 # number of demand (read+write) accesses -system.cpu6.l1c.demand_avg_miss_latency 13560.917263 # average overall miss latency -system.cpu6.l1c.demand_avg_mshr_miss_latency 12377.410436 # average overall mshr miss latency -system.cpu6.l1c.demand_hits 8684 # number of demand (read+write) hits -system.cpu6.l1c.demand_miss_latency 817384288 # number of demand (read+write) miss cycles -system.cpu6.l1c.demand_miss_rate 0.874070 # miss rate for demand accesses -system.cpu6.l1c.demand_misses 60275 # number of demand (read+write) misses +system.cpu6.l1c.demand_accesses 69317 # number of demand (read+write) accesses +system.cpu6.l1c.demand_avg_miss_latency 23578.884069 # average overall miss latency +system.cpu6.l1c.demand_avg_mshr_miss_latency 23571.188798 # average overall mshr miss latency +system.cpu6.l1c.demand_hits 8522 # number of demand (read+write) hits +system.cpu6.l1c.demand_miss_latency 1433478257 # number of demand (read+write) miss cycles +system.cpu6.l1c.demand_miss_rate 0.877058 # miss rate for demand accesses +system.cpu6.l1c.demand_misses 60795 # number of demand (read+write) misses system.cpu6.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu6.l1c.demand_mshr_miss_latency 746048414 # number of demand (read+write) MSHR miss cycles -system.cpu6.l1c.demand_mshr_miss_rate 0.874070 # mshr miss rate for demand accesses -system.cpu6.l1c.demand_mshr_misses 60275 # number of demand (read+write) MSHR misses +system.cpu6.l1c.demand_mshr_miss_latency 1433010423 # number of demand (read+write) MSHR miss cycles +system.cpu6.l1c.demand_mshr_miss_rate 0.877058 # mshr miss rate for demand accesses +system.cpu6.l1c.demand_mshr_misses 60795 # number of demand (read+write) MSHR misses system.cpu6.l1c.fast_writes 0 # number of fast writes performed system.cpu6.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu6.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu6.l1c.overall_accesses 68959 # number of overall (read+write) accesses -system.cpu6.l1c.overall_avg_miss_latency 13560.917263 # average overall miss latency -system.cpu6.l1c.overall_avg_mshr_miss_latency 12377.410436 # average overall mshr miss latency -system.cpu6.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu6.l1c.overall_hits 8684 # number of overall hits -system.cpu6.l1c.overall_miss_latency 817384288 # number of overall miss cycles -system.cpu6.l1c.overall_miss_rate 0.874070 # miss rate for overall accesses -system.cpu6.l1c.overall_misses 60275 # number of overall misses +system.cpu6.l1c.overall_accesses 69317 # number of overall (read+write) accesses +system.cpu6.l1c.overall_avg_miss_latency 23578.884069 # average overall miss latency +system.cpu6.l1c.overall_avg_mshr_miss_latency 23571.188798 # average overall mshr miss latency +system.cpu6.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu6.l1c.overall_hits 8522 # number of overall hits +system.cpu6.l1c.overall_miss_latency 1433478257 # number of overall miss cycles +system.cpu6.l1c.overall_miss_rate 0.877058 # miss rate for overall accesses +system.cpu6.l1c.overall_misses 60795 # number of overall misses system.cpu6.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu6.l1c.overall_mshr_miss_latency 746048414 # number of overall MSHR miss cycles -system.cpu6.l1c.overall_mshr_miss_rate 0.874070 # mshr miss rate for overall accesses -system.cpu6.l1c.overall_mshr_misses 60275 # number of overall MSHR misses -system.cpu6.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu6.l1c.overall_mshr_uncacheable_misses 15267 # number of overall MSHR uncacheable misses +system.cpu6.l1c.overall_mshr_miss_latency 1433010423 # number of overall MSHR miss cycles +system.cpu6.l1c.overall_mshr_miss_rate 0.877058 # mshr miss rate for overall accesses +system.cpu6.l1c.overall_mshr_misses 60795 # number of overall MSHR misses +system.cpu6.l1c.overall_mshr_uncacheable_latency 826404406 # number of overall MSHR uncacheable cycles +system.cpu6.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu6.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu6.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu6.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -718,104 +549,76 @@ system.cpu6.l1c.prefetcher.num_hwpf_issued 0 # system.cpu6.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu6.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu6.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu6.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu6.l1c.protocol.read_invalid 1894590 # read misses to invalid blocks -system.cpu6.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu6.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu6.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu6.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu6.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu6.l1c.protocol.snoop_read_exclusive 2887 # read snoops on exclusive blocks -system.cpu6.l1c.protocol.snoop_read_modified 12551 # read snoops on modified blocks -system.cpu6.l1c.protocol.snoop_read_owned 7188 # read snoops on owned blocks -system.cpu6.l1c.protocol.snoop_read_shared 1703425 # read snoops on shared blocks -system.cpu6.l1c.protocol.snoop_readex_exclusive 1550 # readEx snoops on exclusive blocks -system.cpu6.l1c.protocol.snoop_readex_modified 6733 # readEx snoops on modified blocks -system.cpu6.l1c.protocol.snoop_readex_owned 3926 # readEx snoops on owned blocks -system.cpu6.l1c.protocol.snoop_readex_shared 12456 # readEx snoops on shared blocks -system.cpu6.l1c.protocol.snoop_upgrade_owned 800 # upgrade snoops on owned blocks -system.cpu6.l1c.protocol.snoop_upgrade_shared 3156 # upgradee snoops on shared blocks -system.cpu6.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu6.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu6.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu6.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu6.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu6.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu6.l1c.protocol.write_invalid 987928 # write misses to invalid blocks -system.cpu6.l1c.protocol.write_owned 1405 # write misses to owned blocks -system.cpu6.l1c.protocol.write_shared 4406 # write misses to shared blocks -system.cpu6.l1c.replacements 27613 # number of replacements -system.cpu6.l1c.sampled_refs 27946 # Sample count of references to valid blocks. +system.cpu6.l1c.replacements 27931 # number of replacements +system.cpu6.l1c.sampled_refs 28282 # Sample count of references to valid blocks. system.cpu6.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu6.l1c.tagsinuse 344.860122 # Cycle average of tags in use -system.cpu6.l1c.total_refs 11487 # Total number of references to valid blocks. +system.cpu6.l1c.tagsinuse 346.778818 # Cycle average of tags in use +system.cpu6.l1c.total_refs 11537 # Total number of references to valid blocks. system.cpu6.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu6.l1c.writebacks 11073 # number of writebacks +system.cpu6.l1c.writebacks 10819 # number of writebacks system.cpu6.num_copies 0 # number of copy accesses completed -system.cpu6.num_reads 98723 # number of read accesses completed -system.cpu6.num_writes 53876 # number of write accesses completed -system.cpu7.l1c.ReadReq_accesses 44990 # number of ReadReq accesses(hits+misses) -system.cpu7.l1c.ReadReq_avg_miss_latency 13952.283047 # average ReadReq miss latency -system.cpu7.l1c.ReadReq_avg_mshr_miss_latency 12937.789329 # average ReadReq mshr miss latency -system.cpu7.l1c.ReadReq_hits 7505 # number of ReadReq hits -system.cpu7.l1c.ReadReq_miss_latency 523001330 # number of ReadReq miss cycles -system.cpu7.l1c.ReadReq_miss_rate 0.833185 # miss rate for ReadReq accesses -system.cpu7.l1c.ReadReq_misses 37485 # number of ReadReq misses -system.cpu7.l1c.ReadReq_mshr_miss_latency 484973033 # number of ReadReq MSHR miss cycles -system.cpu7.l1c.ReadReq_mshr_miss_rate 0.833185 # mshr miss rate for ReadReq accesses -system.cpu7.l1c.ReadReq_mshr_misses 37485 # number of ReadReq MSHR misses -system.cpu7.l1c.ReadReq_mshr_uncacheable 10001 # number of ReadReq MSHR uncacheable -system.cpu7.l1c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.cpu7.l1c.ReadResp_mshr_uncacheable_latency 257188342 # number of ReadResp MSHR uncacheable cycles -system.cpu7.l1c.WriteReq_accesses 24083 # number of WriteReq accesses(hits+misses) -system.cpu7.l1c.WriteReq_avg_miss_latency 12615.682417 # average WriteReq miss latency -system.cpu7.l1c.WriteReq_avg_mshr_miss_latency 11155.458639 # average WriteReq mshr miss latency -system.cpu7.l1c.WriteReq_hits 1163 # number of WriteReq hits -system.cpu7.l1c.WriteReq_miss_latency 289151441 # number of WriteReq miss cycles -system.cpu7.l1c.WriteReq_miss_rate 0.951709 # miss rate for WriteReq accesses -system.cpu7.l1c.WriteReq_misses 22920 # number of WriteReq misses -system.cpu7.l1c.WriteReq_mshr_miss_latency 255683112 # number of WriteReq MSHR miss cycles -system.cpu7.l1c.WriteReq_mshr_miss_rate 0.951709 # mshr miss rate for WriteReq accesses -system.cpu7.l1c.WriteReq_mshr_misses 22920 # number of WriteReq MSHR misses -system.cpu7.l1c.WriteReq_mshr_uncacheable 5323 # number of WriteReq MSHR uncacheable -system.cpu7.l1c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.cpu7.l1c.WriteResp_mshr_uncacheable_latency 159397105 # number of WriteResp MSHR uncacheable cycles -system.cpu7.l1c.avg_blocked_cycles_no_mshrs 1185.864523 # average number of cycles each access was blocked +system.cpu6.num_reads 100000 # number of read accesses completed +system.cpu6.num_writes 53600 # number of write accesses completed +system.cpu7.l1c.ReadReq_accesses 44617 # number of ReadReq accesses(hits+misses) +system.cpu7.l1c.ReadReq_avg_miss_latency 22791.302160 # average ReadReq miss latency +system.cpu7.l1c.ReadReq_avg_mshr_miss_latency 22783.302456 # average ReadReq mshr miss latency +system.cpu7.l1c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.cpu7.l1c.ReadReq_hits 7491 # number of ReadReq hits +system.cpu7.l1c.ReadReq_miss_latency 846149884 # number of ReadReq miss cycles +system.cpu7.l1c.ReadReq_miss_rate 0.832104 # miss rate for ReadReq accesses +system.cpu7.l1c.ReadReq_misses 37126 # number of ReadReq misses +system.cpu7.l1c.ReadReq_mshr_miss_latency 845852887 # number of ReadReq MSHR miss cycles +system.cpu7.l1c.ReadReq_mshr_miss_rate 0.832104 # mshr miss rate for ReadReq accesses +system.cpu7.l1c.ReadReq_mshr_misses 37126 # number of ReadReq MSHR misses +system.cpu7.l1c.ReadReq_mshr_uncacheable_latency 523016698 # number of ReadReq MSHR uncacheable cycles +system.cpu7.l1c.WriteReq_accesses 24432 # number of WriteReq accesses(hits+misses) +system.cpu7.l1c.WriteReq_avg_miss_latency 24654.748978 # average WriteReq miss latency +system.cpu7.l1c.WriteReq_avg_mshr_miss_latency 24647.585464 # average WriteReq mshr miss latency +system.cpu7.l1c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.cpu7.l1c.WriteReq_hits 960 # number of WriteReq hits +system.cpu7.l1c.WriteReq_miss_latency 578696268 # number of WriteReq miss cycles +system.cpu7.l1c.WriteReq_miss_rate 0.960707 # miss rate for WriteReq accesses +system.cpu7.l1c.WriteReq_misses 23472 # number of WriteReq misses +system.cpu7.l1c.WriteReq_mshr_miss_latency 578528126 # number of WriteReq MSHR miss cycles +system.cpu7.l1c.WriteReq_mshr_miss_rate 0.960707 # mshr miss rate for WriteReq accesses +system.cpu7.l1c.WriteReq_mshr_misses 23472 # number of WriteReq MSHR misses +system.cpu7.l1c.WriteReq_mshr_uncacheable_latency 310262407 # number of WriteReq MSHR uncacheable cycles +system.cpu7.l1c.avg_blocked_cycles_no_mshrs 2294.299163 # average number of cycles each access was blocked system.cpu7.l1c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.cpu7.l1c.avg_refs 0.413879 # Average number of references to valid blocks. -system.cpu7.l1c.blocked_no_mshrs 69665 # number of cycles access was blocked +system.cpu7.l1c.avg_refs 0.417293 # Average number of references to valid blocks. +system.cpu7.l1c.blocked_no_mshrs 69407 # number of cycles access was blocked system.cpu7.l1c.blocked_no_targets 0 # number of cycles access was blocked -system.cpu7.l1c.blocked_cycles_no_mshrs 82613252 # number of cycles access was blocked +system.cpu7.l1c.blocked_cycles_no_mshrs 159240422 # number of cycles access was blocked system.cpu7.l1c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu7.l1c.cache_copies 0 # number of cache copies performed -system.cpu7.l1c.demand_accesses 69073 # number of demand (read+write) accesses -system.cpu7.l1c.demand_avg_miss_latency 13445.124923 # average overall miss latency -system.cpu7.l1c.demand_avg_mshr_miss_latency 12261.503932 # average overall mshr miss latency -system.cpu7.l1c.demand_hits 8668 # number of demand (read+write) hits -system.cpu7.l1c.demand_miss_latency 812152771 # number of demand (read+write) miss cycles -system.cpu7.l1c.demand_miss_rate 0.874510 # miss rate for demand accesses -system.cpu7.l1c.demand_misses 60405 # number of demand (read+write) misses +system.cpu7.l1c.demand_accesses 69049 # number of demand (read+write) accesses +system.cpu7.l1c.demand_avg_miss_latency 23513.088749 # average overall miss latency +system.cpu7.l1c.demand_avg_mshr_miss_latency 23505.412934 # average overall mshr miss latency +system.cpu7.l1c.demand_hits 8451 # number of demand (read+write) hits +system.cpu7.l1c.demand_miss_latency 1424846152 # number of demand (read+write) miss cycles +system.cpu7.l1c.demand_miss_rate 0.877609 # miss rate for demand accesses +system.cpu7.l1c.demand_misses 60598 # number of demand (read+write) misses system.cpu7.l1c.demand_mshr_hits 0 # number of demand (read+write) MSHR hits -system.cpu7.l1c.demand_mshr_miss_latency 740656145 # number of demand (read+write) MSHR miss cycles -system.cpu7.l1c.demand_mshr_miss_rate 0.874510 # mshr miss rate for demand accesses -system.cpu7.l1c.demand_mshr_misses 60405 # number of demand (read+write) MSHR misses +system.cpu7.l1c.demand_mshr_miss_latency 1424381013 # number of demand (read+write) MSHR miss cycles +system.cpu7.l1c.demand_mshr_miss_rate 0.877609 # mshr miss rate for demand accesses +system.cpu7.l1c.demand_mshr_misses 60598 # number of demand (read+write) MSHR misses system.cpu7.l1c.fast_writes 0 # number of fast writes performed system.cpu7.l1c.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu7.l1c.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu7.l1c.overall_accesses 69073 # number of overall (read+write) accesses -system.cpu7.l1c.overall_avg_miss_latency 13445.124923 # average overall miss latency -system.cpu7.l1c.overall_avg_mshr_miss_latency 12261.503932 # average overall mshr miss latency -system.cpu7.l1c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.cpu7.l1c.overall_hits 8668 # number of overall hits -system.cpu7.l1c.overall_miss_latency 812152771 # number of overall miss cycles -system.cpu7.l1c.overall_miss_rate 0.874510 # miss rate for overall accesses -system.cpu7.l1c.overall_misses 60405 # number of overall misses +system.cpu7.l1c.overall_accesses 69049 # number of overall (read+write) accesses +system.cpu7.l1c.overall_avg_miss_latency 23513.088749 # average overall miss latency +system.cpu7.l1c.overall_avg_mshr_miss_latency 23505.412934 # average overall mshr miss latency +system.cpu7.l1c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.cpu7.l1c.overall_hits 8451 # number of overall hits +system.cpu7.l1c.overall_miss_latency 1424846152 # number of overall miss cycles +system.cpu7.l1c.overall_miss_rate 0.877609 # miss rate for overall accesses +system.cpu7.l1c.overall_misses 60598 # number of overall misses system.cpu7.l1c.overall_mshr_hits 0 # number of overall MSHR hits -system.cpu7.l1c.overall_mshr_miss_latency 740656145 # number of overall MSHR miss cycles -system.cpu7.l1c.overall_mshr_miss_rate 0.874510 # mshr miss rate for overall accesses -system.cpu7.l1c.overall_mshr_misses 60405 # number of overall MSHR misses -system.cpu7.l1c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.cpu7.l1c.overall_mshr_uncacheable_misses 15324 # number of overall MSHR uncacheable misses +system.cpu7.l1c.overall_mshr_miss_latency 1424381013 # number of overall MSHR miss cycles +system.cpu7.l1c.overall_mshr_miss_rate 0.877609 # mshr miss rate for overall accesses +system.cpu7.l1c.overall_mshr_misses 60598 # number of overall MSHR misses +system.cpu7.l1c.overall_mshr_uncacheable_latency 833279105 # number of overall MSHR uncacheable cycles +system.cpu7.l1c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.cpu7.l1c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.cpu7.l1c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.cpu7.l1c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -825,112 +628,92 @@ system.cpu7.l1c.prefetcher.num_hwpf_issued 0 # system.cpu7.l1c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.cpu7.l1c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.cpu7.l1c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.cpu7.l1c.protocol.hwpf_invalid 0 # hard prefetch misses to invalid blocks -system.cpu7.l1c.protocol.read_invalid 1929884 # read misses to invalid blocks -system.cpu7.l1c.protocol.snoop_inv_exclusive 0 # Invalidate snoops on exclusive blocks -system.cpu7.l1c.protocol.snoop_inv_invalid 0 # Invalidate snoops on invalid blocks -system.cpu7.l1c.protocol.snoop_inv_modified 0 # Invalidate snoops on modified blocks -system.cpu7.l1c.protocol.snoop_inv_owned 0 # Invalidate snoops on owned blocks -system.cpu7.l1c.protocol.snoop_inv_shared 0 # Invalidate snoops on shared blocks -system.cpu7.l1c.protocol.snoop_read_exclusive 2904 # read snoops on exclusive blocks -system.cpu7.l1c.protocol.snoop_read_modified 12387 # read snoops on modified blocks -system.cpu7.l1c.protocol.snoop_read_owned 7174 # read snoops on owned blocks -system.cpu7.l1c.protocol.snoop_read_shared 1782059 # read snoops on shared blocks -system.cpu7.l1c.protocol.snoop_readex_exclusive 1587 # readEx snoops on exclusive blocks -system.cpu7.l1c.protocol.snoop_readex_modified 6687 # readEx snoops on modified blocks -system.cpu7.l1c.protocol.snoop_readex_owned 3842 # readEx snoops on owned blocks -system.cpu7.l1c.protocol.snoop_readex_shared 12759 # readEx snoops on shared blocks -system.cpu7.l1c.protocol.snoop_upgrade_owned 792 # upgrade snoops on owned blocks -system.cpu7.l1c.protocol.snoop_upgrade_shared 3085 # upgradee snoops on shared blocks -system.cpu7.l1c.protocol.snoop_writeinv_exclusive 0 # WriteInvalidate snoops on exclusive blocks -system.cpu7.l1c.protocol.snoop_writeinv_invalid 0 # WriteInvalidate snoops on invalid blocks -system.cpu7.l1c.protocol.snoop_writeinv_modified 0 # WriteInvalidate snoops on modified blocks -system.cpu7.l1c.protocol.snoop_writeinv_owned 0 # WriteInvalidate snoops on owned blocks -system.cpu7.l1c.protocol.snoop_writeinv_shared 0 # WriteInvalidate snoops on shared blocks -system.cpu7.l1c.protocol.swpf_invalid 0 # soft prefetch misses to invalid blocks -system.cpu7.l1c.protocol.write_invalid 930930 # write misses to invalid blocks -system.cpu7.l1c.protocol.write_owned 1422 # write misses to owned blocks -system.cpu7.l1c.protocol.write_shared 4465 # write misses to shared blocks -system.cpu7.l1c.replacements 27486 # number of replacements -system.cpu7.l1c.sampled_refs 27827 # Sample count of references to valid blocks. +system.cpu7.l1c.replacements 27613 # number of replacements +system.cpu7.l1c.sampled_refs 27942 # Sample count of references to valid blocks. system.cpu7.l1c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.cpu7.l1c.tagsinuse 344.310963 # Cycle average of tags in use -system.cpu7.l1c.total_refs 11517 # Total number of references to valid blocks. +system.cpu7.l1c.tagsinuse 345.414592 # Cycle average of tags in use +system.cpu7.l1c.total_refs 11660 # Total number of references to valid blocks. system.cpu7.l1c.warmup_cycle 0 # Cycle when the warmup percentage was hit. -system.cpu7.l1c.writebacks 10979 # number of writebacks +system.cpu7.l1c.writebacks 10955 # number of writebacks system.cpu7.num_copies 0 # number of copy accesses completed -system.cpu7.num_reads 99734 # number of read accesses completed -system.cpu7.num_writes 53652 # number of write accesses completed -system.l2c.ReadExReq_accesses 75160 # number of ReadExReq accesses(hits+misses) -system.l2c.ReadExReq_avg_miss_latency 10115.633652 # average ReadExReq miss latency -system.l2c.ReadExReq_avg_mshr_miss_latency 6085.503709 # average ReadExReq mshr miss latency -system.l2c.ReadExReq_hits 39620 # number of ReadExReq hits -system.l2c.ReadExReq_miss_latency 359509620 # number of ReadExReq miss cycles -system.l2c.ReadExReq_miss_rate 0.472858 # miss rate for ReadExReq accesses -system.l2c.ReadExReq_misses 35540 # number of ReadExReq misses -system.l2c.ReadExReq_mshr_hits 220 # number of ReadExReq MSHR hits -system.l2c.ReadExReq_mshr_miss_latency 214939991 # number of ReadExReq MSHR miss cycles -system.l2c.ReadExReq_mshr_miss_rate 0.469931 # mshr miss rate for ReadExReq accesses -system.l2c.ReadExReq_mshr_misses 35320 # number of ReadExReq MSHR misses -system.l2c.ReadReq_accesses 138762 # number of ReadReq accesses(hits+misses) -system.l2c.ReadReq_avg_miss_latency 10150.344064 # average ReadReq miss latency -system.l2c.ReadReq_avg_mshr_miss_latency 6129.500996 # average ReadReq mshr miss latency -system.l2c.ReadReq_hits 72597 # number of ReadReq hits -system.l2c.ReadReq_miss_latency 671597515 # number of ReadReq miss cycles -system.l2c.ReadReq_miss_rate 0.476824 # miss rate for ReadReq accesses -system.l2c.ReadReq_misses 66165 # number of ReadReq misses -system.l2c.ReadReq_mshr_hits 406 # number of ReadReq MSHR hits -system.l2c.ReadReq_mshr_miss_latency 403069856 # number of ReadReq MSHR miss cycles -system.l2c.ReadReq_mshr_miss_rate 0.473898 # mshr miss rate for ReadReq accesses -system.l2c.ReadReq_mshr_misses 65759 # number of ReadReq MSHR misses -system.l2c.ReadReq_mshr_uncacheable 78927 # number of ReadReq MSHR uncacheable -system.l2c.ReadResp_avg_mshr_uncacheable_latency inf # average ReadResp mshr uncacheable latency -system.l2c.ReadResp_mshr_uncacheable_latency 484683934 # number of ReadResp MSHR uncacheable cycles -system.l2c.WriteReq_mshr_uncacheable 42802 # number of WriteReq MSHR uncacheable -system.l2c.WriteResp_avg_mshr_uncacheable_latency inf # average WriteResp mshr uncacheable latency -system.l2c.WriteResp_mshr_uncacheable_latency 248118294 # number of WriteResp MSHR uncacheable cycles -system.l2c.Writeback_accesses 86706 # number of Writeback accesses(hits+misses) -system.l2c.Writeback_hits 18948 # number of Writeback hits -system.l2c.Writeback_miss_rate 0.781468 # miss rate for Writeback accesses -system.l2c.Writeback_misses 67758 # number of Writeback misses -system.l2c.Writeback_mshr_miss_rate 0.781468 # mshr miss rate for Writeback accesses -system.l2c.Writeback_mshr_misses 67758 # number of Writeback MSHR misses +system.cpu7.num_reads 98933 # number of read accesses completed +system.cpu7.num_writes 53679 # number of write accesses completed +system.l2c.ReadExReq_accesses 74732 # number of ReadExReq accesses(hits+misses) +system.l2c.ReadExReq_avg_miss_latency 10058.723893 # average ReadExReq miss latency +system.l2c.ReadExReq_avg_mshr_miss_latency 10012.709549 # average ReadExReq mshr miss latency +system.l2c.ReadExReq_miss_latency 751708554 # number of ReadExReq miss cycles +system.l2c.ReadExReq_miss_rate 1 # miss rate for ReadExReq accesses +system.l2c.ReadExReq_misses 74732 # number of ReadExReq misses +system.l2c.ReadExReq_mshr_hits 486 # number of ReadExReq MSHR hits +system.l2c.ReadExReq_mshr_miss_latency 748269810 # number of ReadExReq MSHR miss cycles +system.l2c.ReadExReq_mshr_miss_rate 1 # mshr miss rate for ReadExReq accesses +system.l2c.ReadExReq_mshr_misses 74732 # number of ReadExReq MSHR misses +system.l2c.ReadReq_accesses 138119 # number of ReadReq accesses(hits+misses) +system.l2c.ReadReq_avg_miss_latency 10093.112454 # average ReadReq miss latency +system.l2c.ReadReq_avg_mshr_miss_latency 10012.902949 # average ReadReq mshr miss latency +system.l2c.ReadReq_avg_mshr_uncacheable_latency inf # average ReadReq mshr uncacheable latency +system.l2c.ReadReq_hits 62746 # number of ReadReq hits +system.l2c.ReadReq_miss_latency 760748165 # number of ReadReq miss cycles +system.l2c.ReadReq_miss_rate 0.545711 # miss rate for ReadReq accesses +system.l2c.ReadReq_misses 75373 # number of ReadReq misses +system.l2c.ReadReq_mshr_hits 858 # number of ReadReq MSHR hits +system.l2c.ReadReq_mshr_miss_latency 754702534 # number of ReadReq MSHR miss cycles +system.l2c.ReadReq_mshr_miss_rate 0.545711 # mshr miss rate for ReadReq accesses +system.l2c.ReadReq_mshr_misses 75373 # number of ReadReq MSHR misses +system.l2c.ReadReq_mshr_uncacheable_latency 792432163 # number of ReadReq MSHR uncacheable cycles +system.l2c.UpgradeReq_accesses 18312 # number of UpgradeReq accesses(hits+misses) +system.l2c.UpgradeReq_avg_miss_latency 5090.815258 # average UpgradeReq miss latency +system.l2c.UpgradeReq_avg_mshr_miss_latency 10012.622433 # average UpgradeReq mshr miss latency +system.l2c.UpgradeReq_miss_latency 93223009 # number of UpgradeReq miss cycles +system.l2c.UpgradeReq_miss_rate 1 # miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_misses 18312 # number of UpgradeReq misses +system.l2c.UpgradeReq_mshr_hits 25 # number of UpgradeReq MSHR hits +system.l2c.UpgradeReq_mshr_miss_latency 183351142 # number of UpgradeReq MSHR miss cycles +system.l2c.UpgradeReq_mshr_miss_rate 1 # mshr miss rate for UpgradeReq accesses +system.l2c.UpgradeReq_mshr_misses 18312 # number of UpgradeReq MSHR misses +system.l2c.WriteReq_avg_mshr_uncacheable_latency inf # average WriteReq mshr uncacheable latency +system.l2c.WriteReq_mshr_uncacheable_latency 430029394 # number of WriteReq MSHR uncacheable cycles +system.l2c.Writeback_accesses 86893 # number of Writeback accesses(hits+misses) +system.l2c.Writeback_miss_rate 1 # miss rate for Writeback accesses +system.l2c.Writeback_misses 86893 # number of Writeback misses +system.l2c.Writeback_mshr_miss_rate 1 # mshr miss rate for Writeback accesses +system.l2c.Writeback_mshr_misses 86893 # number of Writeback MSHR misses system.l2c.avg_blocked_cycles_no_mshrs 3278 # average number of cycles each access was blocked system.l2c.avg_blocked_cycles_no_targets # average number of cycles each access was blocked -system.l2c.avg_refs 1.297661 # Average number of references to valid blocks. +system.l2c.avg_refs 3.318198 # Average number of references to valid blocks. system.l2c.blocked_no_mshrs 3 # number of cycles access was blocked system.l2c.blocked_no_targets 0 # number of cycles access was blocked system.l2c.blocked_cycles_no_mshrs 9834 # number of cycles access was blocked system.l2c.blocked_cycles_no_targets 0 # number of cycles access was blocked system.l2c.cache_copies 0 # number of cache copies performed -system.l2c.demand_accesses 138762 # number of demand (read+write) accesses -system.l2c.demand_avg_miss_latency 10150.344064 # average overall miss latency -system.l2c.demand_avg_mshr_miss_latency 6129.500996 # average overall mshr miss latency -system.l2c.demand_hits 72597 # number of demand (read+write) hits -system.l2c.demand_miss_latency 671597515 # number of demand (read+write) miss cycles -system.l2c.demand_miss_rate 0.476824 # miss rate for demand accesses -system.l2c.demand_misses 66165 # number of demand (read+write) misses -system.l2c.demand_mshr_hits 406 # number of demand (read+write) MSHR hits -system.l2c.demand_mshr_miss_latency 403069856 # number of demand (read+write) MSHR miss cycles -system.l2c.demand_mshr_miss_rate 0.473898 # mshr miss rate for demand accesses -system.l2c.demand_mshr_misses 65759 # number of demand (read+write) MSHR misses +system.l2c.demand_accesses 212851 # number of demand (read+write) accesses +system.l2c.demand_avg_miss_latency 10075.991599 # average overall miss latency +system.l2c.demand_avg_mshr_miss_latency 10012.806662 # average overall mshr miss latency +system.l2c.demand_hits 62746 # number of demand (read+write) hits +system.l2c.demand_miss_latency 1512456719 # number of demand (read+write) miss cycles +system.l2c.demand_miss_rate 0.705212 # miss rate for demand accesses +system.l2c.demand_misses 150105 # number of demand (read+write) misses +system.l2c.demand_mshr_hits 1344 # number of demand (read+write) MSHR hits +system.l2c.demand_mshr_miss_latency 1502972344 # number of demand (read+write) MSHR miss cycles +system.l2c.demand_mshr_miss_rate 0.705212 # mshr miss rate for demand accesses +system.l2c.demand_mshr_misses 150105 # number of demand (read+write) MSHR misses system.l2c.fast_writes 0 # number of fast writes performed system.l2c.mshr_cap_events 0 # number of times MSHR cap was activated system.l2c.no_allocate_misses 0 # Number of misses that were no-allocate -system.l2c.overall_accesses 225468 # number of overall (read+write) accesses -system.l2c.overall_avg_miss_latency 5014.803394 # average overall miss latency -system.l2c.overall_avg_mshr_miss_latency 6129.500996 # average overall mshr miss latency -system.l2c.overall_avg_mshr_uncacheable_latency 0 # average overall mshr uncacheable latency -system.l2c.overall_hits 91545 # number of overall hits -system.l2c.overall_miss_latency 671597515 # number of overall miss cycles -system.l2c.overall_miss_rate 0.593978 # miss rate for overall accesses -system.l2c.overall_misses 133923 # number of overall misses -system.l2c.overall_mshr_hits 406 # number of overall MSHR hits -system.l2c.overall_mshr_miss_latency 403069856 # number of overall MSHR miss cycles -system.l2c.overall_mshr_miss_rate 0.291656 # mshr miss rate for overall accesses -system.l2c.overall_mshr_misses 65759 # number of overall MSHR misses -system.l2c.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles -system.l2c.overall_mshr_uncacheable_misses 121729 # number of overall MSHR uncacheable misses +system.l2c.overall_accesses 212851 # number of overall (read+write) accesses +system.l2c.overall_avg_miss_latency 10075.991599 # average overall miss latency +system.l2c.overall_avg_mshr_miss_latency 10012.806662 # average overall mshr miss latency +system.l2c.overall_avg_mshr_uncacheable_latency inf # average overall mshr uncacheable latency +system.l2c.overall_hits 62746 # number of overall hits +system.l2c.overall_miss_latency 1512456719 # number of overall miss cycles +system.l2c.overall_miss_rate 0.705212 # miss rate for overall accesses +system.l2c.overall_misses 150105 # number of overall misses +system.l2c.overall_mshr_hits 1344 # number of overall MSHR hits +system.l2c.overall_mshr_miss_latency 1502972344 # number of overall MSHR miss cycles +system.l2c.overall_mshr_miss_rate 0.705212 # mshr miss rate for overall accesses +system.l2c.overall_mshr_misses 150105 # number of overall MSHR misses +system.l2c.overall_mshr_uncacheable_latency 1222461557 # number of overall MSHR uncacheable cycles +system.l2c.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses system.l2c.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache system.l2c.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr system.l2c.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue @@ -940,12 +723,12 @@ system.l2c.prefetcher.num_hwpf_issued 0 # nu system.l2c.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated system.l2c.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page system.l2c.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time -system.l2c.replacements 100054 # number of replacements -system.l2c.sampled_refs 101078 # Sample count of references to valid blocks. +system.l2c.replacements 31000 # number of replacements +system.l2c.sampled_refs 31427 # Sample count of references to valid blocks. system.l2c.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions -system.l2c.tagsinuse 1023.099242 # Cycle average of tags in use -system.l2c.total_refs 131165 # Total number of references to valid blocks. -system.l2c.warmup_cycle 296156 # Cycle when the warmup percentage was hit. -system.l2c.writebacks 16243 # number of writebacks +system.l2c.tagsinuse 461.978673 # Cycle average of tags in use +system.l2c.total_refs 104281 # Total number of references to valid blocks. +system.l2c.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.l2c.writebacks 0 # number of writebacks ---------- End Simulation Statistics ---------- diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr b/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr index d45294bbb..87bef1427 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/stderr @@ -1,74 +1,74 @@ warn: Entering event queue @ 0. Starting simulation... -system.cpu7: completed 10000 read accesses @8253930 -system.cpu1: completed 10000 read accesses @8325085 -system.cpu6: completed 10000 read accesses @8427313 -system.cpu4: completed 10000 read accesses @8438233 -system.cpu2: completed 10000 read accesses @8458126 -system.cpu5: completed 10000 read accesses @8549800 -system.cpu3: completed 10000 read accesses @8559995 -system.cpu0: completed 10000 read accesses @8593654 -system.cpu7: completed 20000 read accesses @16744182 -system.cpu1: completed 20000 read accesses @16774744 -system.cpu4: completed 20000 read accesses @16786220 -system.cpu3: completed 20000 read accesses @16787358 -system.cpu5: completed 20000 read accesses @16795808 -system.cpu6: completed 20000 read accesses @16836913 -system.cpu2: completed 20000 read accesses @17031052 -system.cpu0: completed 20000 read accesses @17126654 -system.cpu5: completed 30000 read accesses @24892576 -system.cpu6: completed 30000 read accesses @24903300 -system.cpu3: completed 30000 read accesses @24935860 -system.cpu4: completed 30000 read accesses @25020642 -system.cpu1: completed 30000 read accesses @25031726 -system.cpu7: completed 30000 read accesses @25112091 -system.cpu2: completed 30000 read accesses @25235960 -system.cpu0: completed 30000 read accesses @25505209 -system.cpu5: completed 40000 read accesses @33191203 -system.cpu6: completed 40000 read accesses @33273684 -system.cpu4: completed 40000 read accesses @33345526 -system.cpu3: completed 40000 read accesses @33406412 -system.cpu7: completed 40000 read accesses @33509130 -system.cpu1: completed 40000 read accesses @33509218 -system.cpu2: completed 40000 read accesses @33664822 -system.cpu0: completed 40000 read accesses @33869626 -system.cpu5: completed 50000 read accesses @41488848 -system.cpu4: completed 50000 read accesses @41582702 -system.cpu7: completed 50000 read accesses @41828988 -system.cpu3: completed 50000 read accesses @41829496 -system.cpu1: completed 50000 read accesses @41849534 -system.cpu6: completed 50000 read accesses @41982608 -system.cpu2: completed 50000 read accesses @42197798 -system.cpu0: completed 50000 read accesses @42443468 -system.cpu5: completed 60000 read accesses @49751344 -system.cpu4: completed 60000 read accesses @49783100 -system.cpu1: completed 60000 read accesses @49918062 -system.cpu7: completed 60000 read accesses @49929008 -system.cpu3: completed 60000 read accesses @50173996 -system.cpu6: completed 60000 read accesses @50351766 -system.cpu2: completed 60000 read accesses @50352657 -system.cpu0: completed 60000 read accesses @50789771 -system.cpu4: completed 70000 read accesses @58352386 -system.cpu5: completed 70000 read accesses @58394758 -system.cpu7: completed 70000 read accesses @58570698 -system.cpu1: completed 70000 read accesses @58764169 -system.cpu3: completed 70000 read accesses @58764648 -system.cpu2: completed 70000 read accesses @58921714 -system.cpu6: completed 70000 read accesses @58929984 -system.cpu0: completed 70000 read accesses @59567320 -system.cpu1: completed 80000 read accesses @67092786 -system.cpu5: completed 80000 read accesses @67153667 -system.cpu4: completed 80000 read accesses @67153760 -system.cpu7: completed 80000 read accesses @67207042 -system.cpu3: completed 80000 read accesses @67238507 -system.cpu2: completed 80000 read accesses @67633112 -system.cpu6: completed 80000 read accesses @67664637 -system.cpu0: completed 80000 read accesses @68437288 -system.cpu1: completed 90000 read accesses @75679048 -system.cpu4: completed 90000 read accesses @75680280 -system.cpu7: completed 90000 read accesses @75751053 -system.cpu5: completed 90000 read accesses @75781514 -system.cpu3: completed 90000 read accesses @75844118 -system.cpu2: completed 90000 read accesses @76346671 -system.cpu6: completed 90000 read accesses @76491728 -system.cpu0: completed 90000 read accesses @77376872 -system.cpu1: completed 100000 read accesses @84350509 +system.cpu7: completed 10000 read accesses @15607088 +system.cpu1: completed 10000 read accesses @15686239 +system.cpu5: completed 10000 read accesses @15771479 +system.cpu4: completed 10000 read accesses @15772513 +system.cpu0: completed 10000 read accesses @15778178 +system.cpu6: completed 10000 read accesses @15791633 +system.cpu2: completed 10000 read accesses @15841990 +system.cpu3: completed 10000 read accesses @15878600 +system.cpu2: completed 20000 read accesses @31878727 +system.cpu7: completed 20000 read accesses @32026154 +system.cpu6: completed 20000 read accesses @32057190 +system.cpu1: completed 20000 read accesses @32240417 +system.cpu0: completed 20000 read accesses @32270672 +system.cpu3: completed 20000 read accesses @32335938 +system.cpu5: completed 20000 read accesses @32480722 +system.cpu4: completed 20000 read accesses @32490454 +system.cpu2: completed 30000 read accesses @48060100 +system.cpu6: completed 30000 read accesses @48167196 +system.cpu4: completed 30000 read accesses @48520588 +system.cpu7: completed 30000 read accesses @48646309 +system.cpu0: completed 30000 read accesses @48740616 +system.cpu1: completed 30000 read accesses @48766857 +system.cpu3: completed 30000 read accesses @48959010 +system.cpu5: completed 30000 read accesses @49028132 +system.cpu6: completed 40000 read accesses @64421948 +system.cpu4: completed 40000 read accesses @64637670 +system.cpu2: completed 40000 read accesses @64868400 +system.cpu1: completed 40000 read accesses @64925788 +system.cpu0: completed 40000 read accesses @64956331 +system.cpu3: completed 40000 read accesses @65406565 +system.cpu5: completed 40000 read accesses @65517578 +system.cpu7: completed 40000 read accesses @65556693 +system.cpu6: completed 50000 read accesses @80917227 +system.cpu2: completed 50000 read accesses @80917444 +system.cpu4: completed 50000 read accesses @81159816 +system.cpu1: completed 50000 read accesses @81373401 +system.cpu3: completed 50000 read accesses @81540449 +system.cpu0: completed 50000 read accesses @81577912 +system.cpu5: completed 50000 read accesses @81975441 +system.cpu7: completed 50000 read accesses @82285501 +system.cpu2: completed 60000 read accesses @96985412 +system.cpu4: completed 60000 read accesses @97174738 +system.cpu6: completed 60000 read accesses @97530786 +system.cpu0: completed 60000 read accesses @97671589 +system.cpu3: completed 60000 read accesses @97821937 +system.cpu1: completed 60000 read accesses @97822818 +system.cpu5: completed 60000 read accesses @98044596 +system.cpu7: completed 60000 read accesses @98812006 +system.cpu2: completed 70000 read accesses @113400661 +system.cpu4: completed 70000 read accesses @113949415 +system.cpu1: completed 70000 read accesses @114120869 +system.cpu3: completed 70000 read accesses @114207385 +system.cpu0: completed 70000 read accesses @114307850 +system.cpu6: completed 70000 read accesses @114393410 +system.cpu5: completed 70000 read accesses @114714609 +system.cpu7: completed 70000 read accesses @115286783 +system.cpu2: completed 80000 read accesses @130149084 +system.cpu0: completed 80000 read accesses @130494872 +system.cpu4: completed 80000 read accesses @130604588 +system.cpu6: completed 80000 read accesses @130741327 +system.cpu1: completed 80000 read accesses @130791488 +system.cpu3: completed 80000 read accesses @130805400 +system.cpu5: completed 80000 read accesses @130975948 +system.cpu7: completed 80000 read accesses @131555733 +system.cpu2: completed 90000 read accesses @146468442 +system.cpu6: completed 90000 read accesses @146616353 +system.cpu1: completed 90000 read accesses @146926939 +system.cpu3: completed 90000 read accesses @147059543 +system.cpu0: completed 90000 read accesses @147067458 +system.cpu5: completed 90000 read accesses @147440946 +system.cpu4: completed 90000 read accesses @147560717 +system.cpu7: completed 90000 read accesses @148115904 +system.cpu6: completed 100000 read accesses @163182312 diff --git a/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout b/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout index a77db6fb9..29891e1e8 100644 --- a/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout +++ b/tests/quick/50.memtest/ref/alpha/linux/memtest/stdout @@ -5,9 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:06:20 -M5 started Sun Jun 10 14:22:51 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_SE/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_SE/tests/debug/quick/50.memtest/alpha/linux/memtest tests/run.py quick/50.memtest/alpha/linux/memtest +M5 compiled Aug 3 2007 03:56:47 +M5 started Fri Aug 3 04:17:16 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest tests/run.py quick/50.memtest/alpha/linux/memtest Global frequency set at 1000000000000 ticks per second -Exiting @ tick 84350509 because Maximum number of loads reached! +Exiting @ tick 163182312 because maximum number of loads reached diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini index c16d67687..4cbaaf71e 100644 --- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini +++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/config.ini @@ -14,7 +14,7 @@ kernel=/dist/m5/system/binaries/vmlinux mem_mode=atomic pal=/dist/m5/system/binaries/ts_osfpal physmem=drivesys.physmem -readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-server.rcS +readfile=/z/stever/hg/m5.stever/configs/boot/netperf-server.rcS symbolfile= system_rev=1024 system_type=34 @@ -35,7 +35,7 @@ side_b=drivesys.membus.port[0] [drivesys.cpu] type=AtomicSimpleCPU -children=dtb itb +children=dtb itb tracer clock=1 cpu_id=0 defer_registration=false @@ -55,6 +55,7 @@ profile=0 progress_interval=0 simulate_stalls=false system=drivesys +tracer=drivesys.cpu.tracer width=1 dcache_port=drivesys.membus.port[3] icache_port=drivesys.membus.port[2] @@ -67,6 +68,9 @@ size=64 type=AlphaITB size=48 +[drivesys.cpu.tracer] +type=ExeTracer + [drivesys.disk0] type=IdeDisk children=image @@ -647,7 +651,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=drivesys.tsunami system=drivesys -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=drivesys.tsunami year_is_bcd=false pio=drivesys.iobus.port[23] @@ -704,7 +708,7 @@ kernel=/dist/m5/system/binaries/vmlinux mem_mode=atomic pal=/dist/m5/system/binaries/ts_osfpal physmem=testsys.physmem -readfile=/Users/nate/work/m5/outgoing/configs/boot/netperf-stream-client.rcS +readfile=/z/stever/hg/m5.stever/configs/boot/netperf-stream-client.rcS symbolfile= system_rev=1024 system_type=34 @@ -725,7 +729,7 @@ side_b=testsys.membus.port[0] [testsys.cpu] type=AtomicSimpleCPU -children=dtb itb +children=dtb itb tracer clock=1 cpu_id=0 defer_registration=false @@ -745,6 +749,7 @@ profile=0 progress_interval=0 simulate_stalls=false system=testsys +tracer=testsys.cpu.tracer width=1 dcache_port=testsys.membus.port[3] icache_port=testsys.membus.port[2] @@ -757,6 +762,9 @@ size=64 type=AlphaITB size=48 +[testsys.cpu.tracer] +type=ExeTracer + [testsys.disk0] type=IdeDisk children=image @@ -1337,7 +1345,7 @@ pio_addr=8804615847936 pio_latency=1000 platform=testsys.tsunami system=testsys -time=2009 1 1 0 0 0 3 1 +time=Thu Jan 1 00:00:00 2009 tsunami=testsys.tsunami year_is_bcd=false pio=testsys.iobus.port[23] diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt index e6bc6fb19..719430102 100644 --- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt +++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/m5stats.txt @@ -139,9 +139,10 @@ drivesys.tsunami.ethernet.txPPS 25 # Pa drivesys.tsunami.ethernet.txPackets 5 # Number of Packets Transmitted drivesys.tsunami.ethernet.txTcpChecksums 2 # Number of tx TCP Checksums done by device drivesys.tsunami.ethernet.txUdpChecksums 0 # Number of tx UDP Checksums done by device -host_inst_rate 6618724 # Simulator instruction rate (inst/s) -host_seconds 41.30 # Real time elapsed on the host -host_tick_rate 4842704130 # Simulator tick rate (ticks/s) +host_inst_rate 51081325 # Simulator instruction rate (inst/s) +host_mem_usage 406704 # Number of bytes of host memory used +host_seconds 5.35 # Real time elapsed on the host +host_tick_rate 37372483621 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 273348482 # Number of instructions simulated sim_seconds 0.200001 # Number of seconds simulated @@ -380,9 +381,10 @@ drivesys.tsunami.ethernet.totalSwi 0 # to drivesys.tsunami.ethernet.totalTxDesc 0 # total number of TxDesc written to ISR drivesys.tsunami.ethernet.totalTxIdle 0 # total number of TxIdle written to ISR drivesys.tsunami.ethernet.totalTxOk 0 # total number of TxOk written to ISR -host_inst_rate 65191624612 # Simulator instruction rate (inst/s) +host_inst_rate 71036507796 # Simulator instruction rate (inst/s) +host_mem_usage 406704 # Number of bytes of host memory used host_seconds 0.00 # Real time elapsed on the host -host_tick_rate 183725573 # Simulator tick rate (ticks/s) +host_tick_rate 191282064 # Simulator tick rate (ticks/s) sim_freq 1000000000000 # Frequency of simulated ticks sim_insts 273348482 # Number of instructions simulated sim_seconds 0.000001 # Number of seconds simulated diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr index 8fb9590c3..4f6a93597 100644 --- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr +++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stderr @@ -1,6 +1,6 @@ -Listening for testsys connection on port 3456 -Listening for drivesys connection on port 3457 -0: testsys.remote_gdb.listener: listening for remote gdb #0 on port 7000 -0: drivesys.remote_gdb.listener: listening for remote gdb #1 on port 7001 +Listening for testsys connection on port 3457 +Listening for drivesys connection on port 3458 +0: testsys.remote_gdb.listener: listening for remote gdb on port 7001 +0: drivesys.remote_gdb.listener: listening for remote gdb on port 7002 warn: Entering event queue @ 0. Starting simulation... warn: Obsolete M5 instruction ivlb encountered. diff --git a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout index 08d7271d7..3b074da7f 100644 --- a/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout +++ b/tests/quick/80.netperf-stream/ref/alpha/linux/twosys-tsunami-simple-atomic/stdout @@ -5,11 +5,9 @@ The Regents of The University of Michigan All Rights Reserved -M5 compiled Jun 10 2007 14:10:03 -M5 started Mon Jun 11 01:47:32 2007 -M5 executing on iceaxe -command line: /Users/nate/build/outgoing/build/ALPHA_FS/m5.debug -d /Users/nate/build/outgoing/build/ALPHA_FS/tests/debug/quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic tests/run.py quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic +M5 compiled Aug 3 2007 04:02:11 +M5 started Fri Aug 3 04:26:58 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/ALPHA_FS/m5.fast -d build/ALPHA_FS/tests/fast/quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic tests/run.py quick/80.netperf-stream/alpha/linux/twosys-tsunami-simple-atomic Global frequency set at 1000000000000 ticks per second - 0: testsys.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 - 0: drivesys.tsunami.io.rtc: Real-time clock set to Thu Jan 1 00:00:00 2009 Exiting @ tick 4300235844056 because checkpoint -- cgit v1.2.3 From c2b533cc3b0545b4ffd1d67aa3cc7d94029bd96a Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Fri, 3 Aug 2007 16:27:51 -0700 Subject: Add cscope files to .hgignore. --HG-- extra : convert_revision : 82598579baf50cd258714c7e533b96bc6bd1305a --- .hgignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.hgignore b/.hgignore index cbbb7b492..f536836de 100644 --- a/.hgignore +++ b/.hgignore @@ -2,5 +2,7 @@ syntax: glob build parser.out parsetab.py +cscope.files +cscope.out *.pyc *~ -- cgit v1.2.3 From e8e1ddd5305c4f7d4764f2cd28f70f911a29806f Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 15:56:48 -0700 Subject: SimpleCPU: Add some DPRINTFs --HG-- extra : convert_revision : 5fdd5a9595c3e5d6ce5f9e8c9af0a8e6c857551c --- src/cpu/simple/atomic.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 604c48086..704b65f36 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -183,6 +183,7 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) void AtomicSimpleCPU::resume() { + DPRINTF(SimpleCPU, "Resume\n"); if (_status != SwitchedOut && _status != Idle) { assert(system->getMemoryMode() == Enums::atomic); @@ -231,6 +232,8 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) void AtomicSimpleCPU::activateContext(int thread_num, int delay) { + DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); + assert(thread_num == 0); assert(thread); @@ -248,6 +251,8 @@ AtomicSimpleCPU::activateContext(int thread_num, int delay) void AtomicSimpleCPU::suspendContext(int thread_num) { + DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); + assert(thread_num == 0); assert(thread); @@ -483,6 +488,8 @@ AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) void AtomicSimpleCPU::tick() { + DPRINTF(SimpleCPU, "Tick\n"); + Tick latency = cycles(1); // instruction takes one cycle by default for (int i = 0; i < width; ++i) { -- cgit v1.2.3 From d8900d8478d86789d1120f11c9918d65a456d96e Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:00:36 -0700 Subject: main: return an an exit code of 1 when we exit due to a python exception. This requires us to not use PyRun_SimpleString, but PyRun_String since the latter actually returns a result --HG-- extra : convert_revision : 3e3916ddd7eef9957569d8e72e73ba4c3160ce20 --- src/sim/main.cc | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/sim/main.cc b/src/sim/main.cc index 5bf4add4b..62ab9445b 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -75,6 +75,39 @@ abortHandler(int sigtype) ccprintf(cerr, "Program aborted at cycle %d\n", curTick); } +int +python_main() +{ + PyObject *module; + PyObject *dict; + PyObject *result; + + module = PyImport_AddModule("__main__"); + if (module == NULL) + fatal("Could not import __main__"); + + dict = PyModule_GetDict(module); + + result = PyRun_String("import m5.main", Py_file_input, dict, dict); + if (!result) { + PyErr_Print(); + return 1; + } + Py_DECREF(result); + + result = PyRun_String("m5.main.main()", Py_file_input, dict, dict); + if (!result) { + PyErr_Print(); + return 1; + } + Py_DECREF(result); + + if (Py_FlushLine()) + PyErr_Clear(); + + return 0; +} + int main(int argc, char **argv) { @@ -114,9 +147,10 @@ main(int argc, char **argv) // initialize SWIG modules init_swig(); - PyRun_SimpleString("import m5.main"); - PyRun_SimpleString("m5.main.main()"); + int ret = python_main(); // clean up Python intepreter. Py_Finalize(); + + return ret; } -- cgit v1.2.3 From 157bd25802d0775ea5b2b7c217f3e57f51562dee Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:02:04 -0700 Subject: python: provide access to stats --HG-- extra : convert_revision : 18a4e9ef21bd77ec73482557e028d535f0c1f273 --- src/python/m5/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 96cb2ca13..f21bb362e 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -92,6 +92,7 @@ if running_m5: from event import * from simulate import * from main import options + import stats import SimObject import params -- cgit v1.2.3 From 300712c0d118646d2d2ea206f8d27fd43dbd9040 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:05:18 -0700 Subject: swig: %include all of the enums to get all of the definitions. (instead of %import) --HG-- extra : convert_revision : bc4a39d7be3aad59b34d55aa8dd2c28285f09db9 --- src/python/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/generate.py b/src/python/generate.py index 99d0fb68c..6b167552e 100644 --- a/src/python/generate.py +++ b/src/python/generate.py @@ -270,7 +270,7 @@ class Generate(object): enums = list(enums) enums.sort() for enum in enums: - print >>out, '%%import "enums/%s.hh"' % enum.__name__ + print >>out, '%%include "enums/%s.hh"' % enum.__name__ print >>out for obj in ordered_objs: -- cgit v1.2.3 From 5a27431b969ed0557d2a079066d082153f97af9d Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:06:19 -0700 Subject: python: use the enum values in the memory mode changing code --HG-- extra : convert_revision : 2e399b2b407922ad076f93d33af73e3ba4c05218 --- src/python/m5/simulate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index c703664d4..6db25f0ed 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -148,20 +148,20 @@ def changeToAtomic(system): if not isinstance(system, (objects.Root, objects.System)): raise TypeError, "Parameter of type '%s'. Must be type %s or %s." % \ (type(system), objects.Root, objects.System) - if system.getMemoryMode() != objects.params.SimObject.Atomic: + if system.getMemoryMode() != objects.params.atomic: doDrain(system) print "Changing memory mode to atomic" - system.changeTiming(objects.params.SimObject.Atomic) + system.changeTiming(objects.params.atomic) def changeToTiming(system): if not isinstance(system, (objects.Root, objects.System)): raise TypeError, "Parameter of type '%s'. Must be type %s or %s." % \ (type(system), objects.Root, objects.System) - if system.getMemoryMode() != objects.params.SimObject.Timing: + if system.getMemoryMode() != objects.params.timing: doDrain(system) print "Changing memory mode to timing" - system.changeTiming(objects.params.SimObject.Timing) + system.changeTiming(objects.params.timing) def switchCpus(cpuList): print "switching cpus" -- cgit v1.2.3 From 7a996ccc98421b361f6dfd8fe6e949299152935b Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:09:24 -0700 Subject: switching: Remove the drain and resume code from the switching code. This allows us to change memory modes as well. Clean up the code while we're at it. --HG-- extra : convert_revision : fc5fee9ffd08b791f0607ee2688f32aa65d15354 --- src/python/m5/simulate.py | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index 6db25f0ed..6c70d8fbd 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -167,34 +167,19 @@ def switchCpus(cpuList): print "switching cpus" if not isinstance(cpuList, list): raise RuntimeError, "Must pass a list to this function" - for i in cpuList: - if not isinstance(i, tuple): + for item in cpuList: + if not isinstance(item, tuple) or len(item) != 2: raise RuntimeError, "List must have tuples of (oldCPU,newCPU)" - [old_cpus, new_cpus] = zip(*cpuList) + for old_cpu, new_cpu in cpuList: + if not isinstance(old_cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU" % old_cpu + if not isinstance(new_cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU" % new_cpu - for cpu in old_cpus: - if not isinstance(cpu, objects.BaseCPU): - raise TypeError, "%s is not of type BaseCPU" % cpu - for cpu in new_cpus: - if not isinstance(cpu, objects.BaseCPU): - raise TypeError, "%s is not of type BaseCPU" % cpu - - # Drain all of the individual CPUs - drain_event = internal.event.createCountedDrain() - unready_cpus = 0 - for old_cpu in old_cpus: - unready_cpus += old_cpu.startDrain(drain_event, False) - # If we've got some objects that can't drain immediately, then simulate - if unready_cpus > 0: - drain_event.setCount(unready_cpus) - simulate() - internal.event.cleanupCountedDrain(drain_event) # Now all of the CPUs are ready to be switched out - for old_cpu in old_cpus: + for old_cpu, new_cpu in cpuList: old_cpu._ccObject.switchOut() - index = 0 - for new_cpu in new_cpus: - new_cpu.takeOverFrom(old_cpus[index]) - new_cpu._ccObject.resume() - index += 1 + + for old_cpu, new_cpu in cpuList: + new_cpu.takeOverFrom(old_cpu) -- cgit v1.2.3 From df015f17a45b18302565c43d3790d787e1b54c42 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 4 Aug 2007 16:11:11 -0700 Subject: switching: turn on profiling after a switch if there's an event --HG-- extra : convert_revision : 689e5b85c47bb2aaceb7eb38c2a24a2e5b69376c --- src/cpu/base.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index ee409048b..a54ed9349 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -343,9 +343,8 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU, Port *ic, Port *dc) for (int i = 0; i < threadContexts.size(); ++i) threadContexts[i]->profileClear(); - // The Sampler must take care of this! -// if (profileEvent) -// profileEvent->schedule(curTick); + if (profileEvent) + profileEvent->schedule(curTick); #endif // Connect new CPU to old CPU's memory only if new CPU isn't -- cgit v1.2.3