diff options
Diffstat (limited to 'src/mem')
-rw-r--r-- | src/mem/bus.cc | 172 | ||||
-rw-r--r-- | src/mem/bus.hh | 67 | ||||
-rw-r--r-- | src/mem/cache/base_cache.cc | 26 | ||||
-rw-r--r-- | src/mem/cache/base_cache.hh | 41 | ||||
-rw-r--r-- | src/mem/cache/cache.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/cache_impl.hh | 75 | ||||
-rw-r--r-- | src/mem/cache/coherence/coherence_protocol.cc | 2 | ||||
-rw-r--r-- | src/mem/cache/miss/blocking_buffer.cc | 6 | ||||
-rw-r--r-- | src/mem/cache/miss/blocking_buffer.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/miss/miss_queue.cc | 13 | ||||
-rw-r--r-- | src/mem/cache/miss/miss_queue.hh | 2 | ||||
-rw-r--r-- | src/mem/packet.hh | 36 | ||||
-rw-r--r-- | src/mem/physical.cc | 112 | ||||
-rw-r--r-- | src/mem/physical.hh | 62 | ||||
-rw-r--r-- | src/mem/port.hh | 3 | ||||
-rw-r--r-- | src/mem/request.hh | 6 | ||||
-rw-r--r-- | src/mem/tport.cc | 8 |
17 files changed, 463 insertions, 172 deletions
diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 3c5283a77..7584ffffd 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -67,6 +67,47 @@ Bus::init() (*intIter)->sendStatusChange(Port::RangeChange); } +Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus) +{ + assert(!scheduled()); +} + +void Bus::BusFreeEvent::process() +{ + bus->recvRetry(0); +} + +const char * Bus::BusFreeEvent::description() +{ + return "bus became available"; +} + +void +Bus::occupyBus(int numCycles) +{ + //Move up when the bus will next be free + //We avoid the use of divide by adding repeatedly + //This should be faster if the value is updated frequently, but should + //be may be slower otherwise. + + //Bring tickNextIdle up to the present tick + //There is some potential ambiguity where a cycle starts, which might make + //a difference when devices are acting right around a cycle boundary. Using + //a < allows things which happen exactly on a cycle boundary to take up only + //the following cycle. Anthing that happens later will have to "wait" for the + //end of that cycle, and then start using the bus after that. + while (tickNextIdle < curTick) + tickNextIdle += clock; + //Advance it numCycles bus cycles. + //XXX Should this use the repeating add trick as well? + tickNextIdle += (numCycles * clock); + if (!busIdle.scheduled()) { + busIdle.schedule(tickNextIdle); + } else { + busIdle.reschedule(tickNextIdle); + } + DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", curTick, tickNextIdle); +} /** Function called by the port when the bus is receiving a Timing * transaction.*/ @@ -77,17 +118,26 @@ Bus::recvTiming(Packet *pkt) DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); + Port *pktPort = interfaces[pkt->getSrc()]; + short dest = pkt->getDest(); if (dest == Packet::Broadcast) { - if ( timingSnoopPhase1(pkt) ) - { - timingSnoopPhase2(pkt); + if (timingSnoop(pkt)) { + pkt->flags |= SNOOP_COMMIT; + bool success = timingSnoop(pkt); + assert(success); + if (pkt->flags & SATISFIED) { + //Cache-Cache transfer occuring + if (retryingPort) { + retryList.pop_front(); + retryingPort = NULL; + } + return true; + } port = findPort(pkt->getAddr(), pkt->getSrc()); - } - else - { + } else { //Snoop didn't succeed - retryList.push_back(interfaces[pkt->getSrc()]); + addToRetryList(pktPort); return false; } } else { @@ -95,35 +145,61 @@ Bus::recvTiming(Packet *pkt) assert(dest != pkt->getSrc()); // catch infinite loops port = interfaces[dest]; } + + // The packet will be sent. Figure out how long it occupies the bus. + int numCycles = 0; + // Requests need one cycle to send an address + if (pkt->isRequest()) + numCycles++; + else if (pkt->isResponse() || pkt->hasData()) { + // If a packet has data, it needs ceil(size/width) cycles to send it + // We're using the "adding instead of dividing" trick again here + if (pkt->hasData()) { + int dataSize = pkt->getSize(); + for (int transmitted = 0; transmitted < dataSize; + transmitted += width) { + numCycles++; + } + } else { + // If the packet didn't have data, it must have been a response. + // Those use the bus for one cycle to send their data. + numCycles++; + } + } + + occupyBus(numCycles); + if (port->sendTiming(pkt)) { - // packet was successfully sent, just return true. + // Packet was successfully sent. Return true. + // Also take care of retries + if (retryingPort) { + retryList.pop_front(); + retryingPort = NULL; + } return true; } - // packet not successfully sent - retryList.push_back(interfaces[pkt->getSrc()]); + // Packet not successfully sent. Leave or put it on the retry list. + addToRetryList(pktPort); return false; } void Bus::recvRetry(int id) { - // Go through all the elements on the list calling sendRetry on each - // This is not very efficient at all but it works. Ultimately we should end - // up with something that is more intelligent. - int initialSize = retryList.size(); - int i; - Port *p; - - for (i = 0; i < initialSize; i++) { - assert(retryList.size() > 0); - p = retryList.front(); - retryList.pop_front(); - p->sendRetry(); + // If there's anything waiting... + if (retryList.size()) { + retryingPort = retryList.front(); + retryingPort->sendRetry(); + // If the retryingPort pointer isn't null, sendTiming wasn't called + if (retryingPort) { + warn("sendRetry didn't call sendTiming\n"); + retryList.pop_front(); + retryingPort = NULL; + } } } - Port * Bus::findPort(Addr addr, int id) { @@ -194,44 +270,34 @@ Bus::atomicSnoop(Packet *pkt) } } -bool -Bus::timingSnoopPhase1(Packet *pkt) +void +Bus::functionalSnoop(Packet *pkt) { std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); - bool success = true; - while (!ports.empty() && success) + while (!ports.empty()) { - snoopCallbacks.push_back(ports.back()); - success = interfaces[ports.back()]->sendTiming(pkt); + interfaces[ports.back()]->sendFunctional(pkt); ports.pop_back(); } - if (!success) - { - while (!snoopCallbacks.empty()) - { - interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash); - snoopCallbacks.pop_back(); - } - return false; - } - return true; } -void -Bus::timingSnoopPhase2(Packet *pkt) +bool +Bus::timingSnoop(Packet *pkt) { - bool success; - pkt->flags |= SNOOP_COMMIT; - while (!snoopCallbacks.empty()) + std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc()); + bool success = true; + + while (!ports.empty() && success) { - success = interfaces[snoopCallbacks.back()]->sendTiming(pkt); - //We should not fail on snoop callbacks - assert(success); - snoopCallbacks.pop_back(); + success = interfaces[ports.back()]->sendTiming(pkt); + ports.pop_back(); } + + return success; } + /** Function called by the port when the bus is receiving a Atomic * transaction.*/ Tick @@ -252,7 +318,7 @@ Bus::recvFunctional(Packet *pkt) DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); assert(pkt->getDest() == Packet::Broadcast); - atomicSnoop(pkt); + functionalSnoop(pkt); findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); } @@ -381,16 +447,20 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bus) Param<int> bus_id; + Param<int> clock; + Param<int> width; END_DECLARE_SIM_OBJECT_PARAMS(Bus) BEGIN_INIT_SIM_OBJECT_PARAMS(Bus) - INIT_PARAM(bus_id, "a globally unique bus id") + INIT_PARAM(bus_id, "a globally unique bus id"), + INIT_PARAM(clock, "bus clock speed"), + INIT_PARAM(width, "width of the bus (bits)") END_INIT_SIM_OBJECT_PARAMS(Bus) CREATE_SIM_OBJECT(Bus) { - return new Bus(getInstanceName(), bus_id); + return new Bus(getInstanceName(), bus_id, clock, width); } REGISTER_SIM_OBJECT("Bus", Bus) diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 941389296..a89738775 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -46,11 +46,18 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" class Bus : public MemObject { /** a globally unique id for this bus. */ int busId; + /** the clock speed for the bus */ + int clock; + /** the width of the bus in bytes */ + int width; + /** the next tick at which the bus will be idle */ + Tick tickNextIdle; static const int defaultId = -1; @@ -62,9 +69,6 @@ class Bus : public MemObject AddrRangeList defaultRange; std::vector<DevMap> portSnoopList; - std::vector<int> snoopCallbacks; - - /** Function called by the port when the bus is recieving a Timing transaction.*/ bool recvTiming(Packet *pkt); @@ -105,16 +109,14 @@ class Bus : public MemObject /** Snoop all relevant ports atomicly. */ void atomicSnoop(Packet *pkt); - /** Snoop for NACK and Blocked in phase 1 - * @return True if succeds. - */ - bool timingSnoopPhase1(Packet *pkt); + /** Snoop all relevant ports functionally. */ + void functionalSnoop(Packet *pkt); - /** @todo Don't need to commit all snoops just those that need it - *(register somehow). */ - /** Commit all snoops now that we know if any of them would have blocked. + /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want + * the snoop to happen + * @return True if succeds. */ - void timingSnoopPhase2(Packet *pkt); + bool timingSnoop(Packet *pkt); /** Process address range request. * @param resp addresses that we can respond to @@ -181,6 +183,22 @@ class Bus : public MemObject }; + class BusFreeEvent : public Event + { + Bus * bus; + + public: + BusFreeEvent(Bus * _bus); + void process(); + const char *description(); + }; + + BusFreeEvent busIdle; + + void occupyBus(int numCycles); + + Port * retryingPort; + /** An array of pointers to the peer port interfaces connected to this bus.*/ std::vector<Port*> interfaces; @@ -189,6 +207,23 @@ class Bus : public MemObject * original send failed for whatever reason.*/ std::list<Port*> retryList; + void addToRetryList(Port * port) + { + if (!retryingPort) { + // The device wasn't retrying a packet, or wasn't at an appropriate + // time. + retryList.push_back(port); + } else { + // The device was retrying a packet. It didn't work, so we'll leave + // it at the head of the retry list. + retryingPort = NULL; + + // We shouldn't be receiving a packet from one port when a different + // one is retrying. + assert(port == retryingPort); + } + } + /** Port that handles requests that don't match any of the interfaces.*/ Port *defaultPort; @@ -199,8 +234,14 @@ class Bus : public MemObject virtual void init(); - Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id), defaultPort(NULL) {} + Bus(const std::string &n, int bus_id, int _clock, int _width) + : MemObject(n), busId(bus_id), clock(_clock), width(_width), + tickNextIdle(0), busIdle(this), retryingPort(NULL), defaultPort(NULL) + { + //Both the width and clock period must be positive + assert(width); + assert(clock); + } }; diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index d7ccca8c0..c56f48eeb 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -71,7 +71,7 @@ BaseCache::CachePort::deviceBlockSize() bool BaseCache::CachePort::recvTiming(Packet *pkt) { - if (blocked) + if (pkt->isRequest() && blocked) { DPRINTF(Cache,"Scheduling a retry while blocked\n"); mustSendRetry = true; @@ -105,14 +105,14 @@ BaseCache::CachePort::recvRetry() drainList.pop_front(); } } - - if (!isCpuSide) + else if (!isCpuSide) { pkt = cache->getPacket(); + MSHR* mshr = (MSHR*)pkt->senderState; bool success = sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cache->sendResult(pkt, success); + cache->sendResult(pkt, mshr, success); if (success && cache->doMasterRequest()) { //Still more to issue, rerequest in 1 cycle @@ -123,7 +123,9 @@ BaseCache::CachePort::recvRetry() } else { - pkt = cache->getCoherencePacket(); + //pkt = cache->getCoherencePacket(); + //We save the packet, no reordering on CSHRS + pkt = cshrRetry; bool success = sendTiming(pkt); if (success && cache->doSlaveRequest()) { @@ -182,10 +184,11 @@ BaseCache::CacheEvent::process() { //MSHR pkt = cachePort->cache->getPacket(); + MSHR* mshr = (MSHR*) pkt->senderState; bool success = cachePort->sendTiming(pkt); DPRINTF(Cache, "Address %x was %s in sending the timing request\n", pkt->getAddr(), success ? "succesful" : "unsuccesful"); - cachePort->cache->sendResult(pkt, success); + cachePort->cache->sendResult(pkt, mshr, success); if (success && cachePort->cache->doMasterRequest()) { //Still more to issue, rerequest in 1 cycle @@ -198,7 +201,11 @@ BaseCache::CacheEvent::process() //CSHR pkt = cachePort->cache->getCoherencePacket(); bool success = cachePort->sendTiming(pkt); - if (success && cachePort->cache->doSlaveRequest()) + if (!success) { + //Need to send on a retry + cachePort->cshrRetry = pkt; + } + else if (cachePort->cache->doSlaveRequest()) { //Still more to issue, rerequest in 1 cycle pkt = NULL; @@ -209,7 +216,10 @@ BaseCache::CacheEvent::process() } //Response //Know the packet to send - pkt->result = Packet::Success; + if (pkt->flags & NACKED_LINE) + pkt->result = Packet::Nacked; + else + pkt->result = Packet::Success; pkt->makeTimingResponse(); if (!cachePort->drainList.empty()) { //Already blocked waiting for bus, just append diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index c69fb7fd5..c45f3b71b 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -72,6 +72,7 @@ enum RequestCause{ Request_PF }; +class MSHR; /** * A basic cache interface. Implements some common functions for speed. */ @@ -112,6 +113,8 @@ class BaseCache : public MemObject bool isCpuSide; std::list<Packet *> drainList; + + Packet *cshrRetry; }; struct CacheEvent : public Event @@ -156,7 +159,7 @@ class BaseCache : public MemObject if (status == Port::RangeChange){ if (!isCpuSide) { cpuSidePort->sendStatusChange(Port::RangeChange); - if (topLevelCache && !snoopRangesSent) { + if (!snoopRangesSent) { snoopRangesSent = true; memSidePort->sendStatusChange(Port::RangeChange); } @@ -165,10 +168,6 @@ class BaseCache : public MemObject memSidePort->sendStatusChange(Port::RangeChange); } } - else if (status == Port::SnoopSquash) { - assert(snoopPhase2); - snoopPhase2 = false; - } } virtual Packet *getPacket() @@ -181,7 +180,7 @@ class BaseCache : public MemObject fatal("No implementation"); } - virtual void sendResult(Packet* &pkt, bool success) + virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success) { fatal("No implementation"); @@ -215,9 +214,6 @@ class BaseCache : public MemObject bool topLevelCache; - /** True if we are now in phase 2 of the snoop process. */ - bool snoopPhase2; - /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -523,8 +519,10 @@ class BaseCache : public MemObject */ void respond(Packet *pkt, Tick time) { - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } } /** @@ -537,8 +535,10 @@ class BaseCache : public MemObject if (!pkt->req->isUncacheable()) { missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time; } - CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); - reqCpu->schedule(time); + if (pkt->needsResponse()) { + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); + } } /** @@ -549,6 +549,7 @@ class BaseCache : public MemObject { // assert("Implement\n" && 0); // mi->respond(pkt,curTick + hitLatency); + assert (pkt->needsResponse()); CacheEvent *reqMem = new CacheEvent(memSidePort, pkt); reqMem->schedule(time); } @@ -570,14 +571,14 @@ class BaseCache : public MemObject { //This is where snoops get updated AddrRangeList dummy; - if (!topLevelCache) - { +// if (!topLevelCache) +// { cpuSidePort->getPeerAddressRanges(dummy, snoop); - } - else - { - snoop.push_back(RangeSize(0,-1)); - } +// } +// else +// { +// snoop.push_back(RangeSize(0,-1)); +// } return; } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 4b8870c95..923bf8255 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -175,7 +175,7 @@ class Cache : public BaseCache * @param pkt The request. * @param success True if the request was sent successfully. */ - virtual void sendResult(Packet * &pkt, bool success); + virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success); /** * Handles a response (cache line fill/write ack) from the bus. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 46f4b0ebe..c3c1c0881 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -60,28 +60,20 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } - if (!(pkt->flags & SATISFIED)) { - access(pkt); - } + access(pkt); + } else { if (pkt->isResponse()) handleResponse(pkt); else { - //Check if we are in phase1 - if (!snoopPhase2) { - snoopPhase2 = true; - } - else { - //Check if we should do the snoop - if (pkt->flags && SNOOP_COMMIT) - snoop(pkt); - snoopPhase2 = false; - } + //Check if we should do the snoop + if (pkt->flags & SNOOP_COMMIT) + snoop(pkt); } } return true; @@ -95,7 +87,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (isCpuSide) { //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { pkt->req->setScResult(1); } @@ -125,7 +117,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) pkt->req->setThreadContext(0,0); //Temporary solution to LL/SC - if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { + if (pkt->isWrite() && (pkt->req->isLocked())) { assert("Can't handle LL/SC on functional path\n"); } @@ -211,9 +203,8 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) pkt->getAddr() & (((ULL(1))<<48)-1), pkt->getAddr() & ~((Addr)blkSize - 1)); - //@todo Should this return latency have the hit latency in it? -// respond(pkt,curTick+lat); pkt->flags |= SATISFIED; + //Invalidates/Upgrades need no response if they get the bus // return MA_HIT; //@todo, return values return true; } @@ -243,9 +234,9 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); + pkt->getAddr() & ~((Addr)blkSize - 1)); if (blk) { // Hit hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; @@ -294,10 +285,10 @@ Cache<TagStore,Buffering,Coherence>::getPacket() template<class TagStore, class Buffering, class Coherence> void -Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success) +Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success) { if (success) { - missQueue->markInService(pkt); + missQueue->markInService(pkt, mshr); //Temp Hack for UPGRADES if (pkt->cmd == Packet::UpgradeReq) { handleResponse(pkt); @@ -313,6 +304,13 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { + if (pkt->result == Packet::Nacked) { + pkt->reinitFromRequest(); + panic("Unimplemented NACK of packet\n"); + } + if (pkt->result == Packet::BadAddress) { + //Make the response a Bad address and send it + } // MemDebug::cacheResponse(pkt); DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), pkt->getAddr() & (((ULL(1))<<48)-1)); @@ -321,9 +319,11 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt) blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; + CacheBlk::State new_state = coherence->getNewState(pkt,old_state); + DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n", + pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, - coherence->getNewState(pkt,old_state), - writebacks, pkt); + new_state, writebacks, pkt); while (!writebacks.empty()) { missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); @@ -394,9 +394,9 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) + assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; pkt->flags |= NACKED_LINE; - assert("Don't detect these on the other side yet\n"); respondToSnoop(pkt, curTick + hitLatency); return; } @@ -410,7 +410,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - assert("Don't have invalidates yet\n"); + panic("Don't have invalidates yet\n"); invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on @@ -433,6 +433,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data + assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; //If we are in an exclusive protocol, make it ask again @@ -451,7 +452,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) if (pkt->isInvalidate()) { //This must be an upgrade or other cache will take ownership - missQueue->markInService(mshr->pkt); + missQueue->markInService(mshr->pkt, mshr); } return; } @@ -461,10 +462,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) CacheBlk::State new_state; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + "new state is %i\n", + pkt->cmdString(), new_state); + tags->handleSnoop(blk, new_state, pkt); respondToSnoop(pkt, curTick + hitLatency); return; } + if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", + pkt->cmdString(), new_state); tags->handleSnoop(blk, new_state); } @@ -610,7 +617,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort // update the cache state and statistics if (mshr || !writes.empty()){ // Can't handle it, return pktuest unsatisfied. - return 0; + panic("Atomic access ran into outstanding MSHR's or WB's!"); } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill @@ -627,7 +634,9 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort lat = memSidePort->sendAtomic(busPkt); //Be sure to flip the response to a request for coherence - busPkt->makeAtomicResponse(); + if (busPkt->needsResponse()) { + busPkt->makeAtomicResponse(); + } /* if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return @@ -662,7 +671,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; } else if (pkt->isWrite()) { // Still need to change data in all locations. - return otherSidePort->sendAtomic(pkt); + otherSidePort->sendFunctional(pkt); } return curTick + lat; } @@ -680,9 +689,15 @@ Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt) CacheBlk::State new_state = 0; bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { + DPRINTF(Cache, "Cache snooped a %s request and now supplying data," + "new state is %i\n", + pkt->cmdString(), new_state); + tags->handleSnoop(blk, new_state, pkt); return hitLatency; } + if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n", + pkt->cmdString(), new_state); tags->handleSnoop(blk, new_state); return 0; } diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index bcf3ce9c5..e28dda3dc 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name, } Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; - Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 7a6ea9133..f7aacff89 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -123,12 +123,12 @@ BlockingBuffer::restoreOrigCmd(Packet * &pkt) } void -BlockingBuffer::markInService(Packet * &pkt) +BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr) { if (!pkt->isCacheFill() && pkt->isWrite()) { // Forwarding a write/ writeback, don't need to change // the command - assert((MSHR*)pkt->senderState == &wb); + assert(mshr == &wb); cache->clearMasterRequest(Request_WB); if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); @@ -138,7 +138,7 @@ BlockingBuffer::markInService(Packet * &pkt) wb.inService = true; } } else { - assert((MSHR*)pkt->senderState == &miss); + assert(mshr == &miss); cache->clearMasterRequest(Request_MSHR); if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 641d5a798..f7069696c 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -152,7 +152,7 @@ public: * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Frees the resources of the pktuest and unblock the cache. diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 273b6587f..bdb7a39c8 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,blkSize); + MSHR* mshr = wb.allocate(pkt,size); mshr->order = order++; //REMOVING COMPRESSION FOR NOW @@ -446,7 +446,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, blkSize, time); + mshr = allocateWrite(pkt, pkt->getSize(), time); return; } @@ -526,9 +526,8 @@ MissQueue::restoreOrigCmd(Packet * &pkt) } void -MissQueue::markInService(Packet * &pkt) +MissQueue::markInService(Packet * &pkt, MSHR* mshr) { - assert(pkt->senderState != 0); bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; @@ -540,7 +539,7 @@ MissQueue::markInService(Packet * &pkt) // Forwarding a write/ writeback, don't need to change // the command unblock = wb.isFull(); - wb.markInService((MSHR*)pkt->senderState); + wb.markInService(mshr); if (!wb.havePending()){ cache->clearMasterRequest(Request_WB); } @@ -551,11 +550,11 @@ MissQueue::markInService(Packet * &pkt) } } else { unblock = mq.isFull(); - mq.markInService((MSHR*)pkt->senderState); + mq.markInService(mshr); if (!mq.havePending()){ cache->clearMasterRequest(Request_MSHR); } - if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { + if (mshr->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", cache->name()); //Also clear pending if need be diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index 505d1f90c..179638d2b 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -256,7 +256,7 @@ class MissQueue * are successfully sent. * @param pkt The request that was sent on the bus. */ - void markInService(Packet * &pkt); + void markInService(Packet * &pkt, MSHR* mshr); /** * Collect statistics and free resources of a satisfied pktuest. diff --git a/src/mem/packet.hh b/src/mem/packet.hh index be9bf5f57..4d57aee75 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -58,10 +58,6 @@ typedef std::list<PacketPtr> PacketList; #define NO_ALLOCATE 1 << 5 #define SNOOP_COMMIT 1 << 6 -//For statistics we need max number of commands, hard code it at -//20 for now. @todo fix later -#define NUM_MEM_CMDS 1 << 9 - /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -164,6 +160,8 @@ class Packet private: /** List of command attributes. */ + // If you add a new CommandAttribute, make sure to increase NUM_MEM_CMDS + // as well. enum CommandAttribute { IsRead = 1 << 0, @@ -174,30 +172,37 @@ class Packet IsResponse = 1 << 5, NeedsResponse = 1 << 6, IsSWPrefetch = 1 << 7, - IsHWPrefetch = 1 << 8 + IsHWPrefetch = 1 << 8, + HasData = 1 << 9 }; +//For statistics we need max number of commands, hard code it at +//20 for now. @todo fix later +#define NUM_MEM_CMDS 1 << 10 + public: /** List of all commands associated with a packet. */ enum Command { InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, - WriteReq = IsWrite | IsRequest | NeedsResponse, - WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse | NeedsResponse, + WriteReq = IsWrite | IsRequest | NeedsResponse | HasData, + WriteReqNoAck = IsWrite | IsRequest | HasData, + ReadResp = IsRead | IsResponse | NeedsResponse | HasData, WriteResp = IsWrite | IsResponse | NeedsResponse, - Writeback = IsWrite | IsRequest, + Writeback = IsWrite | IsRequest | HasData, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, - HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch + | NeedsResponse | HasData, + HardPFResp = IsRead | IsResponse | IsHWPrefetch + | NeedsResponse | HasData, InvalidateReq = IsInvalidate | IsRequest, - WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData, + UpgradeReq = IsInvalidate | IsRequest, ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse + ReadExResp = IsRead | IsInvalidate | IsResponse + | NeedsResponse | HasData }; /** Return the string name of the cmd field (for debugging and @@ -219,6 +224,7 @@ class Packet bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } bool isInvalidate() { return (cmd & IsInvalidate) != 0; } + bool hasData() { return (cmd & HasData) != 0; } bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } diff --git a/src/mem/physical.cc b/src/mem/physical.cc index 0580954de..7303f278e 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt) return lat; } + + +// Add load-locked to tracking list. Should only be called if the +// operation is a load and the LOCKED flag is set. +void +PhysicalMemory::trackLoadLocked(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + + // first we check if we already have a locked addr for this + // xc. Since each xc only gets one, we just update the + // existing record with the new address. + list<LockedAddr>::iterator i; + + for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) { + if (i->matchesContext(req)) { + DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + i->addr = paddr; + return; + } + } + + // no record for this xc: need to allocate a new one + DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + lockedAddrList.push_front(LockedAddr(req)); +} + + +// Called on *writes* only... both regular stores and +// store-conditional operations. Check for conventional stores which +// conflict with locked addresses, and for success/failure of store +// conditionals. +bool +PhysicalMemory::checkLockedAddrList(Request *req) +{ + Addr paddr = LockedAddr::mask(req->getPaddr()); + bool isLocked = req->isLocked(); + + // Initialize return value. Non-conditional stores always + // succeed. Assume conditional stores will fail until proven + // otherwise. + bool success = !isLocked; + + // Iterate over list. Note that there could be multiple matching + // records, as more than one context could have done a load locked + // to this location. + list<LockedAddr>::iterator i = lockedAddrList.begin(); + + while (i != lockedAddrList.end()) { + + if (i->addr == paddr) { + // we have a matching address + + if (isLocked && i->matchesContext(req)) { + // it's a store conditional, and as far as the memory + // system can tell, the requesting context's lock is + // still valid. + DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n", + req->getCpuNum(), req->getThreadNum(), paddr); + success = true; + } + + // Get rid of our record of this lock and advance to next + DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n", + i->cpuNum, i->threadNum, paddr); + i = lockedAddrList.erase(i); + } + else { + // no match: advance to next record + ++i; + } + } + + if (isLocked) { + req->setScResult(success ? 1 : 0); + } + + return success; +} + void PhysicalMemory::doFunctionalAccess(Packet *pkt) { assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size()); - switch (pkt->cmd) { - case Packet::ReadReq: + if (pkt->isRead()) { + if (pkt->req->isLocked()) { + trackLoadLocked(pkt->req); + } memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - params()->addrRange.start, pkt->getSize()); - break; - case Packet::WriteReq: - memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, - pkt->getPtr<uint8_t>(), - pkt->getSize()); - // temporary hack: will need to add real LL/SC implementation - // for cacheless systems later. - if (pkt->req->getFlags() & LOCKED) { - pkt->req->setScResult(1); + } + else if (pkt->isWrite()) { + if (writeOK(pkt->req)) { + memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start, + pkt->getPtr<uint8_t>(), pkt->getSize()); } - break; - default: + } + else if (pkt->isInvalidate()) { + //upgrade or invalidate + pkt->flags |= SATISFIED; + } + else { panic("unimplemented"); } @@ -147,7 +231,7 @@ PhysicalMemory::getPort(const std::string &if_name, int idx) port = new MemoryPort(name() + "-port", this); return port; } else if (if_name == "functional") { - /* special port for functional writes at startup. */ + /* special port for functional writes at startup. And for memtester */ return new MemoryPort(name() + "-funcport", this); } else { panic("PhysicalMemory::getPort: unknown port %s requested", if_name); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index 02308b2ef..97bea2ec4 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject const PhysicalMemory &operator=(const PhysicalMemory &specmem); protected: + + class LockedAddr { + public: + // on alpha, minimum LL/SC granularity is 16 bytes, so lower + // bits need to masked off. + static const Addr Addr_Mask = 0xf; + + static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); } + + Addr addr; // locked address + int cpuNum; // locking CPU + int threadNum; // locking thread ID within CPU + + // check for matching execution context + bool matchesContext(Request *req) + { + return (cpuNum == req->getCpuNum() && + threadNum == req->getThreadNum()); + } + + LockedAddr(Request *req) + : addr(mask(req->getPaddr())), + cpuNum(req->getCpuNum()), + threadNum(req->getThreadNum()) + { + } + }; + + std::list<LockedAddr> lockedAddrList; + + // helper function for checkLockedAddrs(): we really want to + // inline a quick check for an empty locked addr list (hopefully + // the common case), and do the full list search (if necessary) in + // this out-of-line function + bool checkLockedAddrList(Request *req); + + // Record the address of a load-locked operation so that we can + // clear the execution context's lock flag if a matching store is + // performed + void trackLoadLocked(Request *req); + + // Compare a store address with any locked addresses so we can + // clear the lock flag appropriately. Return value set to 'false' + // if store operation should be suppressed (because it was a + // conditional store and the address was no longer locked by the + // requesting execution context), 'true' otherwise. Note that + // this method must be called on *all* stores since even + // non-conditional stores must clear any matching lock addresses. + bool writeOK(Request *req) { + if (lockedAddrList.empty()) { + // no locked addrs: nothing to check, store_conditional fails + bool isLocked = req->isLocked(); + if (isLocked) { + req->setScResult(0); + } + return !isLocked; // only do write if not an sc + } else { + // iterate over list... + return checkLockedAddrList(req); + } + } + uint8_t *pmemAddr; MemoryPort *port; int pagePtr; diff --git a/src/mem/port.hh b/src/mem/port.hh index 6b4184043..bb3bc1b1b 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -106,8 +106,7 @@ class Port /** Holds the ports status. Currently just that a range recomputation needs * to be done. */ enum Status { - RangeChange, - SnoopSquash + RangeChange }; void setName(const std::string &name) diff --git a/src/mem/request.hh b/src/mem/request.hh index 6acd7526c..e54984fcd 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -232,9 +232,11 @@ class Request Addr getPC() { assert(validPC); return pc; } /** Accessor Function to Check Cacheability. */ - bool isUncacheable() { return getFlags() & UNCACHEABLE; } + bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; } - bool isInstRead() { return getFlags() & INST_READ; } + bool isInstRead() { return (getFlags() & INST_READ) != 0; } + + bool isLocked() { return (getFlags() & LOCKED) != 0; } friend class Packet; }; diff --git a/src/mem/tport.cc b/src/mem/tport.cc index 55c301c87..cef7a2a5b 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -47,9 +47,11 @@ SimpleTimingPort::recvTiming(Packet *pkt) // if we ever added it back. assert(pkt->result != Packet::Nacked); Tick latency = recvAtomic(pkt); - // turn packet around to go back to requester - pkt->makeTimingResponse(); - sendTimingLater(pkt, latency); + // turn packet around to go back to requester if response expected + if (pkt->needsResponse()) { + pkt->makeTimingResponse(); + sendTimingLater(pkt, latency); + } return true; } |