diff options
-rw-r--r-- | configs/common/O3_ARM_v7a.py | 1 | ||||
-rw-r--r-- | src/mem/cache/Cache.py | 16 | ||||
-rw-r--r-- | src/mem/cache/base.hh | 12 | ||||
-rw-r--r-- | src/mem/cache/cache.cc | 114 | ||||
-rw-r--r-- | src/mem/cache/cache.hh | 71 | ||||
-rw-r--r-- | src/mem/cache/mshr.cc | 14 | ||||
-rw-r--r-- | src/mem/cache/mshr.hh | 9 | ||||
-rw-r--r-- | src/mem/cache/mshr_queue.cc | 4 | ||||
-rw-r--r-- | src/mem/cache/mshr_queue.hh | 3 |
9 files changed, 207 insertions, 37 deletions
diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py index 9f250f57d..02beb11d1 100644 --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -185,6 +185,7 @@ class O3_ARM_v7aL2(Cache): assoc = 16 write_buffers = 8 prefetch_on_access = True + clusivity = 'mostly_excl' # Simple stride prefetcher prefetcher = StridePrefetcher(degree=8, latency = 1) tags = RandomRepl() diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py index 8ad1177e7..48e52a8d5 100644 --- a/src/mem/cache/Cache.py +++ b/src/mem/cache/Cache.py @@ -84,6 +84,22 @@ class BaseCache(MemObject): system = Param.System(Parent.any, "System we belong to") +# Enum for cache clusivity, currently mostly inclusive or mostly +# exclusive. +class Clusivity(Enum): vals = ['mostly_incl', 'mostly_excl'] + class Cache(BaseCache): type = 'Cache' cxx_header = 'mem/cache/cache.hh' + + # Control whether this cache should be mostly inclusive or mostly + # exclusive with respect to upstream caches. The behaviour on a + # fill is determined accordingly. For a mostly inclusive cache, + # blocks are allocated on all fill operations. Thus, L1 caches + # should be set as mostly inclusive even if they have no upstream + # caches. In the case of a mostly exclusive cache, fills are not + # allocating unless they came directly from a non-caching source, + # e.g. a table walker. Additionally, on a hit from an upstream + # cache a line is dropped for a mostly exclusive cache. + clusivity = Param.Clusivity('mostly_incl', + "Clusivity with upstream cache") diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index a992583fe..cb1baa3f4 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -210,7 +210,8 @@ class BaseCache : public MemObject // overlap assert(addr == blockAlign(addr)); - MSHR *mshr = mq->allocate(addr, size, pkt, time, order++); + MSHR *mshr = mq->allocate(addr, size, pkt, time, order++, + allocOnFill(pkt->cmd)); if (mq->isFull()) { setBlocked((BlockedCause)mq->index); @@ -234,6 +235,15 @@ class BaseCache : public MemObject } /** + * Determine if we should allocate on a fill or not. + * + * @param cmd Packet command being added as an MSHR target + * + * @return Whether we should allocate on a fill or not + */ + virtual bool allocOnFill(MemCmd cmd) const = 0; + + /** * Write back dirty blocks in the cache using functional accesses. */ virtual void memWriteback() = 0; diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index a03790abc..58afdc79a 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -68,7 +68,11 @@ Cache::Cache(const CacheParams *p) tags(p->tags), prefetcher(p->prefetcher), doFastWrites(true), - prefetchOnAccess(p->prefetch_on_access) + prefetchOnAccess(p->prefetch_on_access), + clusivity(p->clusivity), + tempBlockWriteback(nullptr), + writebackTempBlockAtomicEvent(this, false, + EventBase::Delayed_Writeback_Pri) { tempBlock = new CacheBlk(); tempBlock->data = new uint8_t[blkSize]; @@ -198,10 +202,10 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk, if (blk->isDirty()) { pkt->assertMemInhibit(); } - // on ReadExReq we give up our copy unconditionally - if (blk != tempBlock) - tags->invalidate(blk); - blk->invalidate(); + // on ReadExReq we give up our copy unconditionally, + // even if this cache is mostly inclusive, we may want + // to revisit this + invalidateBlock(blk); } else if (blk->isWritable() && !pending_downgrade && !pkt->sharedAsserted() && pkt->cmd != MemCmd::ReadCleanReq) { @@ -220,9 +224,30 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk, if (!deferred_response) { // if we are responding immediately and can // signal that we're transferring ownership - // along with exclusivity, do so + // (inhibit set) along with exclusivity + // (shared not set), do so pkt->assertMemInhibit(); + + // if this cache is mostly inclusive, we keep + // the block as writable (exclusive), and pass + // it upwards as writable and dirty + // (modified), hence we have multiple caches + // considering the same block writable, + // something that we get away with due to the + // fact that: 1) this cache has been + // considered the ordering points and + // responded to all snoops up till now, and 2) + // we always snoop upwards before consulting + // the local cache, both on a normal request + // (snooping done by the crossbar), and on a + // snoop blk->status &= ~BlkDirty; + + // if this cache is mostly exclusive with + // respect to the cache above, drop the block + if (clusivity == Enums::mostly_excl) { + invalidateBlock(blk); + } } else { // if we're responding after our own miss, // there's a window where the recipient didn't @@ -241,9 +266,10 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, CacheBlk *blk, // Upgrade or Invalidate, since we have it Exclusively (E or // M), we ack then invalidate. assert(pkt->isUpgrade() || pkt->isInvalidate()); - assert(blk != tempBlock); - tags->invalidate(blk); - blk->invalidate(); + + // for invalidations we could be looking at the temp block + // (for upgrades we always allocate) + invalidateBlock(blk); DPRINTF(Cache, "%s for %s addr %#llx size %d (invalidation)\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); } @@ -761,7 +787,8 @@ Cache::recvTimingReq(PacketPtr pkt) // buffer and to schedule an event to the queued // port and also takes into account the additional // delay of the xbar. - mshr->allocateTarget(pkt, forward_time, order++); + mshr->allocateTarget(pkt, forward_time, order++, + allocOnFill(pkt->cmd)); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); @@ -1027,13 +1054,15 @@ Cache::recvAtomic(PacketPtr pkt) // write-line request to the cache that promoted // the write to a whole line - blk = handleFill(pkt, blk, writebacks); + blk = handleFill(pkt, blk, writebacks, + allocOnFill(pkt->cmd)); satisfyCpuSideRequest(pkt, blk); } else if (bus_pkt->isRead() || bus_pkt->cmd == MemCmd::UpgradeResp) { // we're updating cache state to allow us to // satisfy the upstream request from the cache - blk = handleFill(bus_pkt, blk, writebacks); + blk = handleFill(bus_pkt, blk, writebacks, + allocOnFill(pkt->cmd)); satisfyCpuSideRequest(pkt, blk); } else { // we're satisfying the upstream request without @@ -1056,9 +1085,34 @@ Cache::recvAtomic(PacketPtr pkt) // immediately rather than calling requestMemSideBus() as we do // there). - // Handle writebacks (from the response handling) if needed + // do any writebacks resulting from the response handling doWritebacksAtomic(writebacks); + // if we used temp block, check to see if its valid and if so + // clear it out, but only do so after the call to recvAtomic is + // finished so that any downstream observers (such as a snoop + // filter), first see the fill, and only then see the eviction + if (blk == tempBlock && tempBlock->isValid()) { + // the atomic CPU calls recvAtomic for fetch and load/store + // sequentuially, and we may already have a tempBlock + // writeback from the fetch that we have not yet sent + if (tempBlockWriteback) { + // if that is the case, write the prevoius one back, and + // do not schedule any new event + writebackTempBlockAtomic(); + } else { + // the writeback/clean eviction happens after the call to + // recvAtomic has finished (but before any successive + // calls), so that the response handling from the fill is + // allowed to happen first + schedule(writebackTempBlockAtomicEvent, curTick()); + } + + tempBlockWriteback = blk->isDirty() ? writebackBlk(blk) : + cleanEvictBlk(blk); + blk->invalidate(); + } + if (pkt->needsResponse()) { pkt->makeAtomicResponse(); } @@ -1214,7 +1268,7 @@ Cache::recvTimingResp(PacketPtr pkt) DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n", pkt->getAddr()); - blk = handleFill(pkt, blk, writebacks); + blk = handleFill(pkt, blk, writebacks, mshr->allocOnFill); assert(blk != NULL); } @@ -1258,7 +1312,7 @@ Cache::recvTimingResp(PacketPtr pkt) // deferred targets if possible mshr->promoteExclusive(); // NB: we use the original packet here and not the response! - blk = handleFill(tgt_pkt, blk, writebacks); + blk = handleFill(tgt_pkt, blk, writebacks, mshr->allocOnFill); assert(blk != NULL); // treat as a fill, and discard the invalidation @@ -1362,9 +1416,7 @@ Cache::recvTimingResp(PacketPtr pkt) // should not invalidate the block, so check if the // invalidation should be discarded if (is_invalidate || mshr->hasPostInvalidate()) { - assert(blk != tempBlock); - tags->invalidate(blk); - blk->invalidate(); + invalidateBlock(blk); } else if (mshr->hasPostDowngrade()) { blk->status &= ~BlkWritable; } @@ -1588,6 +1640,13 @@ Cache::allocateBlock(Addr addr, bool is_secure, PacketList &writebacks) return blk; } +void +Cache::invalidateBlock(CacheBlk *blk) +{ + if (blk != tempBlock) + tags->invalidate(blk); + blk->invalidate(); +} // Note that the reason we return a list of writebacks rather than // inserting them directly in the write buffer is that this function @@ -1595,7 +1654,8 @@ Cache::allocateBlock(Addr addr, bool is_secure, PacketList &writebacks) // mode we don't mess with the write buffer (we just perform the // writebacks atomically once the original request is complete). CacheBlk* -Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks) +Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, + bool allocate) { assert(pkt->isResponse() || pkt->cmd == MemCmd::WriteLineReq); Addr addr = pkt->getAddr(); @@ -1619,11 +1679,14 @@ Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks) // happens in the subsequent satisfyCpuSideRequest. assert(pkt->isRead() || pkt->cmd == MemCmd::WriteLineReq); - // need to do a replacement - blk = allocateBlock(addr, is_secure, writebacks); + // need to do a replacement if allocating, otherwise we stick + // with the temporary storage + blk = allocate ? allocateBlock(addr, is_secure, writebacks) : NULL; + if (blk == NULL) { - // No replaceable block... just use temporary storage to - // complete the current request and then get rid of it + // No replaceable block or a mostly exclusive + // cache... just use temporary storage to complete the + // current request and then get rid of it assert(!tempBlock->isValid()); blk = tempBlock; tempBlock->set = tags->extractSet(addr); @@ -1877,6 +1940,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, // applies both to reads and writes and that for writes it // works thanks to the fact that we still have dirty data and // will write it back at a later point + assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); if (have_exclusive) { // in the case of an uncacheable request there is no point @@ -1911,9 +1975,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, // Do this last in case it deallocates block data or something // like that if (invalidate) { - if (blk != tempBlock) - tags->invalidate(blk); - blk->invalidate(); + invalidateBlock(blk); } DPRINTF(Cache, "new state is %s\n", blk->print()); diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index ae9e7e694..6da837003 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 ARM Limited + * Copyright (c) 2012-2015 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -53,6 +53,7 @@ #define __MEM_CACHE_CACHE_HH__ #include "base/misc.hh" // fatal, panic, and warn +#include "enums/Clusivity.hh" #include "mem/cache/base.hh" #include "mem/cache/blk.hh" #include "mem/cache/mshr.hh" @@ -194,6 +195,13 @@ class Cache : public BaseCache */ const bool prefetchOnAccess; + /** + * Clusivity with respect to the upstream cache, determining if we + * fill into both this cache and the cache above on a miss. Note + * that we currently do not support strict clusivity policies. + */ + const Enums::Clusivity clusivity; + /** * Upstream caches need this packet until true is returned, so * hold it for deletion until a subsequent call @@ -201,6 +209,35 @@ class Cache : public BaseCache std::unique_ptr<Packet> pendingDelete; /** + * Writebacks from the tempBlock, resulting on the response path + * in atomic mode, must happen after the call to recvAtomic has + * finished (for the right ordering of the packets). We therefore + * need to hold on to the packets, and have a method and an event + * to send them. + */ + PacketPtr tempBlockWriteback; + + /** + * Send the outstanding tempBlock writeback. To be called after + * recvAtomic finishes in cases where the block we filled is in + * fact the tempBlock, and now needs to be written back. + */ + void writebackTempBlockAtomic() { + assert(tempBlockWriteback != nullptr); + PacketList writebacks{tempBlockWriteback}; + doWritebacksAtomic(writebacks); + tempBlockWriteback = nullptr; + } + + /** + * An event to writeback the tempBlock after recvAtomic + * finishes. To avoid other calls to recvAtomic getting in + * between, we create this event with a higher priority. + */ + EventWrapper<Cache, &Cache::writebackTempBlockAtomic> \ + writebackTempBlockAtomicEvent; + + /** * Does all the processing necessary to perform the provided request. * @param pkt The memory request to perform. * @param blk The cache block to be updated. @@ -226,17 +263,47 @@ class Cache : public BaseCache CacheBlk *allocateBlock(Addr addr, bool is_secure, PacketList &writebacks); /** + * Invalidate a cache block. + * + * @param blk Block to invalidate + */ + void invalidateBlock(CacheBlk *blk); + + /** * Populates a cache block and handles all outstanding requests for the * satisfied fill request. This version takes two memory requests. One * contains the fill data, the other is an optional target to satisfy. * @param pkt The memory request with the fill data. * @param blk The cache block if it already exists. * @param writebacks List for any writebacks that need to be performed. + * @param allocate Whether to allocate a block or use the temp block * @return Pointer to the new cache block. */ CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk, - PacketList &writebacks); + PacketList &writebacks, bool allocate); + /** + * Determine whether we should allocate on a fill or not. If this + * cache is mostly inclusive with regards to the upstream cache(s) + * we always allocate (for any non-forwarded and cacheable + * requests). In the case of a mostly exclusive cache, we allocate + * on fill if the packet did not come from a cache, thus if we: + * are dealing with a whole-line write (the latter behaves much + * like a writeback), the original target packet came from a + * non-caching source, or if we are performing a prefetch or LLSC. + * + * @param cmd Command of the incoming requesting packet + * @return Whether we should allocate on the fill + */ + inline bool allocOnFill(MemCmd cmd) const + { + return clusivity == Enums::mostly_incl || + cmd == MemCmd::WriteLineReq || + cmd == MemCmd::ReadReq || + cmd == MemCmd::WriteReq || + cmd.isPrefetch() || + cmd.isLLSC(); + } /** * Performs the access specified by the request. diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index f71ff6524..b58c256cd 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -66,7 +66,8 @@ MSHR::MSHR() : readyTime(0), _isUncacheable(false), downstreamPending(false), postInvalidate(false), postDowngrade(false), queue(NULL), order(0), blkAddr(0), blkSize(0), isSecure(false), inService(false), - isForward(false), threadNum(InvalidThreadID), data(NULL) + isForward(false), allocOnFill(false), + threadNum(InvalidThreadID), data(NULL) { } @@ -202,7 +203,7 @@ MSHR::TargetList::print(std::ostream &os, int verbosity, void MSHR::allocate(Addr blk_addr, unsigned blk_size, PacketPtr target, - Tick when_ready, Counter _order) + Tick when_ready, Counter _order, bool alloc_on_fill) { blkAddr = blk_addr; blkSize = blk_size; @@ -211,6 +212,7 @@ MSHR::allocate(Addr blk_addr, unsigned blk_size, PacketPtr target, order = _order; assert(target); isForward = false; + allocOnFill = alloc_on_fill; _isUncacheable = target->req->isUncacheable(); inService = false; downstreamPending = false; @@ -274,7 +276,8 @@ MSHR::deallocate() * Adds a target to an MSHR */ void -MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) +MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order, + bool alloc_on_fill) { // assume we'd never issue a prefetch when we've got an // outstanding miss @@ -285,6 +288,10 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) // have targets addded if originally allocated uncacheable assert(!_isUncacheable); + // potentially re-evaluate whether we should allocate on a fill or + // not + allocOnFill = allocOnFill || alloc_on_fill; + // if there's a request already in service for this MSHR, we will // have to defer the new target until after the response if any of // the following are true: @@ -478,6 +485,7 @@ MSHR::print(std::ostream &os, int verbosity, const std::string &prefix) const prefix, blkAddr, blkAddr + blkSize - 1, isSecure ? "s" : "ns", isForward ? "Forward" : "", + allocOnFill ? "AllocOnFill" : "", isForwardNoResponse() ? "ForwNoResp" : "", needsExclusive() ? "Excl" : "", _isUncacheable ? "Unc" : "", diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh index 11ca4db40..45d7628fd 100644 --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -161,6 +161,9 @@ class MSHR : public Packet::SenderState, public Printable /** True if the request is just a simple forward from an upper level */ bool isForward; + /** Keep track of whether we should allocate on fill or not */ + bool allocOnFill; + /** The pending* and post* flags are only valid if inService is * true. Using the accessor functions lets us detect if these * flags are accessed improperly. @@ -218,9 +221,10 @@ class MSHR : public Packet::SenderState, public Printable * @param pkt The original miss. * @param when_ready When should the MSHR be ready to act upon. * @param _order The logical order of this MSHR + * @param alloc_on_fill Should the cache allocate a block on fill */ void allocate(Addr blk_addr, unsigned blk_size, PacketPtr pkt, - Tick when_ready, Counter _order); + Tick when_ready, Counter _order, bool alloc_on_fill); bool markInService(bool pending_dirty_resp); @@ -235,7 +239,8 @@ class MSHR : public Packet::SenderState, public Printable * Add a request to the list of targets. * @param target The target. */ - void allocateTarget(PacketPtr target, Tick when, Counter order); + void allocateTarget(PacketPtr target, Tick when, Counter order, + bool alloc_on_fill); bool handleSnoop(PacketPtr target, Counter order); /** A simple constructor. */ diff --git a/src/mem/cache/mshr_queue.cc b/src/mem/cache/mshr_queue.cc index 47f044d63..3aa5d85be 100644 --- a/src/mem/cache/mshr_queue.cc +++ b/src/mem/cache/mshr_queue.cc @@ -146,14 +146,14 @@ MSHRQueue::addToReadyList(MSHR *mshr) MSHR * MSHRQueue::allocate(Addr blk_addr, unsigned blk_size, PacketPtr pkt, - Tick when_ready, Counter order) + Tick when_ready, Counter order, bool alloc_on_fill) { assert(!freeList.empty()); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - mshr->allocate(blk_addr, blk_size, pkt, when_ready, order); + mshr->allocate(blk_addr, blk_size, pkt, when_ready, order, alloc_on_fill); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->readyIter = addToReadyList(mshr); diff --git a/src/mem/cache/mshr_queue.hh b/src/mem/cache/mshr_queue.hh index eebfed827..29191a358 100644 --- a/src/mem/cache/mshr_queue.hh +++ b/src/mem/cache/mshr_queue.hh @@ -152,13 +152,14 @@ class MSHRQueue : public Drainable * @param pkt The original miss. * @param when_ready When should the MSHR be ready to act upon. * @param order The logical order of this MSHR + * @param alloc_on_fill Should the cache allocate a block on fill * * @return The a pointer to the MSHR allocated. * * @pre There are free entries. */ MSHR *allocate(Addr blk_addr, unsigned blk_size, PacketPtr pkt, - Tick when_ready, Counter order); + Tick when_ready, Counter order, bool alloc_on_fill); /** * Removes the given MSHR from the queue. This places the MSHR on the |