From 462c288a757bc4fd50324ebe2f1fd697d53838ab Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Fri, 25 Sep 2015 07:13:54 -0400 Subject: mem: Make the coherent crossbar account for timing snoops This patch introduces the concept of a snoop latency. Given the requirement to snoop and forward packets in zero time (due to the coherency mechanism), the latency is accounted for later. On a snoop, we establish the latency, and later add it to the header delay of the packet. To allow multiple caches to contribute to the snoop latency, we use a separate variable in the packet, and then take the maximum before adding it to the header delay. --- src/mem/cache/cache.cc | 36 +++++++++++++++++++++++++++++------- src/mem/cache/cache.hh | 17 ++++++++++++----- src/mem/coherent_xbar.cc | 16 ++++++++++++++++ src/mem/packet.hh | 13 +++++++++++-- 4 files changed, 68 insertions(+), 14 deletions(-) (limited to 'src/mem') diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index 3060c3e77..19d8dc800 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -1730,7 +1730,7 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, memSidePort->schedTimingSnoopResp(pkt, forward_time, true); } -void +uint32_t Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, bool is_deferred, bool pending_inval) { @@ -1748,6 +1748,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, bool invalidate = pkt->isInvalidate(); bool M5_VAR_USED needs_exclusive = pkt->needsExclusive(); + uint32_t snoop_delay = 0; + if (forwardSnoops) { // first propagate snoop upward to see if anyone above us wants to // handle it. save & restore packet src since it will get @@ -1765,6 +1767,12 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, // time snoopPkt.headerDelay = snoopPkt.payloadDelay = 0; cpuSidePort->sendTimingSnoopReq(&snoopPkt); + + // add the header delay (including crossbar and snoop + // delays) of the upward snoop to the snoop delay for this + // cache + snoop_delay += snoopPkt.headerDelay; + if (snoopPkt.memInhibitAsserted()) { // cache-to-cache response from some upper cache assert(!alreadyResponded); @@ -1796,7 +1804,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, if (!blk || !blk->isValid()) { DPRINTF(Cache, "%s snoop miss for %s addr %#llx size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - return; + return snoop_delay; } else { DPRINTF(Cache, "%s snoop hit for %s for addr %#llx size %d, " "old state is %s\n", __func__, pkt->cmdString(), @@ -1824,7 +1832,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from" " lower cache\n", pkt->getAddr(), pkt->cmdString()); pkt->setBlockCached(); - return; + return snoop_delay; } if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) { @@ -1884,6 +1892,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, } DPRINTF(Cache, "new state is %s\n", blk->print()); + + return snoop_delay; } @@ -1907,6 +1917,14 @@ Cache::recvTimingSnoopReq(PacketPtr pkt) Addr blk_addr = blockAlign(pkt->getAddr()); MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure); + // Update the latency cost of the snoop so that the crossbar can + // account for it. Do not overwrite what other neighbouring caches + // have already done, rather take the maximum. The update is + // tentative, for cases where we return before an upward snoop + // happens below. + pkt->snoopDelay = std::max(pkt->snoopDelay, + lookupLatency * clockPeriod()); + // Inform request(Prefetch, CleanEvict or Writeback) from below of // MSHR hit, set setBlockCached. if (mshr && pkt->mustCheckAbove()) { @@ -2004,7 +2022,12 @@ Cache::recvTimingSnoopReq(PacketPtr pkt) // We could be more selective and return here if the // request is non-exclusive or if the writeback is // exclusive. - handleSnoop(pkt, blk, true, false, false); + uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false); + + // Override what we did when we first saw the snoop, as we now + // also have the cost of the upwards snoops to account for + pkt->snoopDelay = std::max(pkt->snoopDelay, snoop_delay + + lookupLatency * clockPeriod()); } bool @@ -2030,9 +2053,8 @@ Cache::recvAtomicSnoop(PacketPtr pkt) } CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); - handleSnoop(pkt, blk, false, false, false); - // We consider forwardLatency here because a snoop occurs in atomic mode - return forwardLatency * clockPeriod(); + uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false); + return snoop_delay + lookupLatency * clockPeriod(); } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 3d439caa7..577eab664 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -300,12 +300,19 @@ class Cache : public BaseCache bool already_copied, bool pending_inval); /** - * Sets the blk to the new state. - * @param blk The cache block being snooped. - * @param new_state The new coherence state for the block. + * Perform an upward snoop if needed, and update the block state + * (possibly invalidating the block). Also create a response if required. + * + * @param pkt Snoop packet + * @param blk Cache block being snooped + * @param is_timing Timing or atomic for the response + * @param is_deferred Is this a deferred snoop or not? + * @param pending_inval Do we have a pending invalidation? + * + * @return The snoop delay incurred by the upwards snoop */ - void handleSnoop(PacketPtr ptk, CacheBlk *blk, - bool is_timing, bool is_deferred, bool pending_inval); + uint32_t handleSnoop(PacketPtr pkt, CacheBlk *blk, + bool is_timing, bool is_deferred, bool pending_inval); /** * Create a writeback request for the given block. diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 52f69e0c9..32ca6d02d 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -187,6 +187,8 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; if (!system->bypassCaches()) { + assert(pkt->snoopDelay == 0); + // the packet is a memory-mapped request and should be // broadcasted to our snoopers but the source if (snoopFilter) { @@ -204,6 +206,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) } else { forwardTiming(pkt, slave_port_id); } + + // add the snoop delay to our header delay, and then reset it + pkt->headerDelay += pkt->snoopDelay; + pkt->snoopDelay = 0; } // forwardTiming snooped into peer caches of the sender, and if @@ -377,9 +383,15 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) // we should only see express snoops from caches assert(pkt->isExpressSnoop()); + // set the packet header and payload delay, for now use forward latency + // @todo Assess the choice of latency further + calcPacketTiming(pkt, forwardLatency * clockPeriod()); + // remeber if the packet is inhibited so we can see if it changes const bool is_inhibited = pkt->memInhibitAsserted(); + assert(pkt->snoopDelay == 0); + if (snoopFilter) { // let the Snoop Filter work its magic and guide probing auto sf_res = snoopFilter->lookupSnoop(pkt); @@ -398,6 +410,10 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) forwardTiming(pkt, InvalidPortID); } + // add the snoop delay to our header delay, and then reset it + pkt->headerDelay += pkt->snoopDelay; + pkt->snoopDelay = 0; + // if we can expect a response, remember how to route it if (!is_inhibited && pkt->memInhibitAsserted()) { assert(routeTo.find(pkt->req) == routeTo.end()); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2b667d26d..f33ee120d 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -323,6 +323,14 @@ class Packet : public Printable */ uint32_t headerDelay; + /** + * Keep track of the extra delay incurred by snooping upwards + * before sending a request down the memory system. This is used + * by the coherent crossbar to account for the additional request + * delay. + */ + uint32_t snoopDelay; + /** * The extra pipelining delay from seeing the packet until the end of * payload is transmitted by the component that provided it (if @@ -582,7 +590,7 @@ class Packet : public Printable */ Packet(const RequestPtr _req, MemCmd _cmd) : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), - size(0), headerDelay(0), payloadDelay(0), + size(0), headerDelay(0), snoopDelay(0), payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -603,7 +611,7 @@ class Packet : public Printable */ Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize) : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), - headerDelay(0), payloadDelay(0), + headerDelay(0), snoopDelay(0), payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -628,6 +636,7 @@ class Packet : public Printable addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size), bytesValid(pkt->bytesValid), headerDelay(pkt->headerDelay), + snoopDelay(0), payloadDelay(pkt->payloadDelay), senderState(pkt->senderState) { -- cgit v1.2.3