mem: Add write coalescing and write-no-allocate to the caches

Enable the cache to detect contiguous writes and hold on to the MSHR long enough to allow the entire line to be written. If the whole line is written, the MSHR will be sent out as an invalidation requests, as it is part of a whole-line write, i.e. no-fetch-on-write. The cache is also able to switch to a write-no-allocate policy on the actual completion of the writes, and instead use the tempBlock and turn the write operation into a writeback. These policies are all well-known, and described in works such as Jouppi, Cache Write Policies and Performance, vol 21, no 2, ACM, 1993. Change-Id: I19792f2970b3c6798c9b2b493acdd156897284ae Reviewed-on: https://gem5-review.googlesource.com/c/12907 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
author: Nikos Nikoleris <nikos.nikoleris@arm.com> 2016-10-10 14:06:00 +0100
committer: Nikos Nikoleris <nikos.nikoleris@arm.com> 2018-10-18 09:40:41 +0000
commit: 313c015bbc61da0b8acedc84e4d136835a9f9805 (patch)
tree: e9043ea307e6c9027738e53377a23fbec3e906b8 /src/mem/cache/base.hh
parent: bf305c14c47c4bd228c0910ea3bb30ab4bc935e1 (diff)
download: gem5-313c015bbc61da0b8acedc84e4d136835a9f9805.tar.xz
1 files changed, 149 insertions, 0 deletions
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 47218f828..b9fd7f943 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -73,6 +73,7 @@
 #include "mem/packet_queue.hh"
 #include "mem/qport.hh"
 #include "mem/request.hh"
+#include "params/WriteAllocator.hh"
 #include "sim/eventq.hh"
 #include "sim/serialize.hh"
 #include "sim/sim_exit.hh"
@@ -329,6 +330,22 @@ class BaseCache : public MemObject
     const bool prefetchOnAccess;
 
     /**
+     * The writeAllocator drive optimizations for streaming writes.
+     * It first determines whether a WriteReq MSHR should be delayed,
+     * thus ensuring that we wait longer in cases when we are write
+     * coalescing and allowing all the bytes of the line to be written
+     * before the MSHR packet is sent downstream. This works in unison
+     * with the tracking in the MSHR to check if the entire line is
+     * written. The write mode also affects the behaviour on filling
+     * any whole-line writes. Normally the cache allocates the line
+     * when receiving the InvalidateResp, but after seeing enough
+     * consecutive lines we switch to using the tempBlock, and thus
+     * end up not allocating the line, and instead turning the
+     * whole-line write into a writeback straight away.
+     */
+    WriteAllocator * const writeAllocator;
+
+    /**
      * Temporary cache block for occasional transitory use.  We use
      * the tempBlock to fill when allocation fails (e.g., when there
      * is an outstanding request that accesses the victim block) or
@@ -1161,4 +1178,136 @@ class BaseCache : public MemObject
 
 };
 
+/**
+ * The write allocator inspects write packets and detects streaming
+ * patterns. The write allocator supports a single stream where writes
+ * are expected to access consecutive locations and keeps track of
+ * size of the area covered by the concecutive writes in byteCount.
+ *
+ * 1) When byteCount has surpassed the coallesceLimit the mode
+ * switches from ALLOCATE to COALESCE where writes should be delayed
+ * until the whole block is written at which point a single packet
+ * (whole line write) can service them.
+ *
+ * 2) When byteCount has also exceeded the noAllocateLimit (whole
+ * line) we switch to NO_ALLOCATE when writes should not allocate in
+ * the cache but rather send a whole line write to the memory below.
+ */
+class WriteAllocator : public SimObject {
+  public:
+    WriteAllocator(const WriteAllocatorParams *p) :
+        SimObject(p),
+        coalesceLimit(p->coalesce_limit * p->block_size),
+        noAllocateLimit(p->no_allocate_limit * p->block_size),
+        delayThreshold(p->delay_threshold)
+    {
+        reset();
+    }
+
+    /**
+     * Should writes be coalesced? This is true if the mode is set to
+     * NO_ALLOCATE.
+     *
+     * @return return true if the cache should coalesce writes.
+     */
+    bool coalesce() const {
+        return mode != WriteMode::ALLOCATE;
+    }
+
+    /**
+     * Should writes allocate?
+     *
+     * @return return true if the cache should not allocate for writes.
+     */
+    bool allocate() const {
+        return mode != WriteMode::NO_ALLOCATE;
+    }
+
+    /**
+     * Reset the write allocator state, meaning that it allocates for
+     * writes and has not recorded any information about qualifying
+     * writes that might trigger a switch to coalescing and later no
+     * allocation.
+     */
+    void reset() {
+        mode = WriteMode::ALLOCATE;
+        byteCount = 0;
+        nextAddr = 0;
+    }
+
+    /**
+     * Access whether we need to delay the current write.
+     *
+     * @param blk_addr The block address the packet writes to
+     * @return true if the current packet should be delayed
+     */
+    bool delay(Addr blk_addr) {
+        if (delayCtr[blk_addr] > 0) {
+            --delayCtr[blk_addr];
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Clear delay counter for the input block
+     *
+     * @param blk_addr The accessed cache block
+     */
+    void resetDelay(Addr blk_addr) {
+        delayCtr.erase(blk_addr);
+    }
+
+    /**
+     * Update the write mode based on the current write
+     * packet. This method compares the packet's address with any
+     * current stream, and updates the tracking and the mode
+     * accordingly.
+     *
+     * @param write_addr Start address of the write request
+     * @param write_size Size of the write request
+     * @param blk_addr The block address that this packet writes to
+     */
+    void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
+
+  private:
+    /**
+     * The current mode for write coalescing and allocation, either
+     * normal operation (ALLOCATE), write coalescing (COALESCE), or
+     * write coalescing without allocation (NO_ALLOCATE).
+     */
+    enum class WriteMode : char {
+        ALLOCATE,
+        COALESCE,
+        NO_ALLOCATE,
+    };
+    WriteMode mode;
+
+    /** Address to match writes against to detect streams. */
+    Addr nextAddr;
+
+    /**
+     * Bytes written contiguously. Saturating once we no longer
+     * allocate.
+     */
+    uint32_t byteCount;
+
+    /**
+     * Limits for when to switch between the different write modes.
+     */
+    const uint32_t coalesceLimit;
+    const uint32_t noAllocateLimit;
+    /**
+     * The number of times the allocator will delay an WriteReq MSHR.
+     */
+    const uint32_t delayThreshold;
+
+    /**
+     * Keep track of the number of times the allocator has delayed an
+     * WriteReq MSHR.
+     */
+    std::unordered_map<Addr, Counter> delayCtr;
+};
+
 #endif //__MEM_CACHE_BASE_HH__
author	Nikos Nikoleris <nikos.nikoleris@arm.com>	2016-10-10 14:06:00 +0100
committer	Nikos Nikoleris <nikos.nikoleris@arm.com>	2018-10-18 09:40:41 +0000
commit	313c015bbc61da0b8acedc84e4d136835a9f9805 (patch)
tree	e9043ea307e6c9027738e53377a23fbec3e906b8 /src/mem/cache/base.hh
parent	bf305c14c47c4bd228c0910ea3bb30ab4bc935e1 (diff)
download	gem5-313c015bbc61da0b8acedc84e4d136835a9f9805.tar.xz