diff options
24 files changed, 54 insertions, 4 deletions
diff --git a/configs/common/Caches.py b/configs/common/Caches.py index 3adc7e5c9..ffcd63c49 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -34,6 +34,7 @@ class L1Cache(BaseCache): latency = '1ns' mshrs = 10 tgts_per_mshr = 5 + is_top_level = True class L2Cache(BaseCache): assoc = 8 @@ -49,6 +50,7 @@ class PageTableWalkerCache(BaseCache): mshrs = 10 size = '1kB' tgts_per_mshr = 12 + is_top_level = True class IOCache(BaseCache): assoc = 8 @@ -58,3 +60,4 @@ class IOCache(BaseCache): size = '1kB' tgts_per_mshr = 12 forward_snoops = False + is_top_level = True diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index a2f2b4f8a..3092bd937 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -112,6 +112,9 @@ DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt) { DPRINTF(Fetch, "Received timing\n"); if (pkt->isResponse()) { + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + fetch->processCacheCompletion(pkt); } //else Snooped a coherence request, just return diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index be97bc4ad..ffe8fdf06 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -139,6 +139,9 @@ DmaPort::recvTiming(PacketPtr pkt) assert(pendingCount >= 0); assert(state); + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + state->numBytes += pkt->req->getSize(); assert(state->totBytes >= state->numBytes); if (state->totBytes == state->numBytes) { diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index dffac2234..5c7ae5274 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -48,6 +48,7 @@ class BaseCache(MemObject): size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") + is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 9166e1a09..b7e331d54 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -58,6 +58,7 @@ BaseCache::BaseCache(const Params *p) hitLatency(p->latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), + isTopLevel(p->is_top_level), blocked(0), noTargetMSHR(NULL), missCount(p->max_miss_count), diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index e8a644296..28ddf5054 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -194,6 +194,11 @@ class BaseCache : public MemObject /** Do we forward snoops from mem side port through to cpu side port? */ bool forwardSnoops; + /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should + * never try to forward ownership and similar optimizations to the cpu + * side */ + bool isTopLevel; + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index e4e4a3c92..0b2b273f9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -216,7 +216,7 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk, if (blk->isDirty()) { // special considerations if we're owner: - if (!deferred_response) { + if (!deferred_response && !isTopLevel) { // if we are responding immediately and can // signal that we're transferring ownership // along with exclusivity, do so diff --git a/tests/configs/inorder-timing.py b/tests/configs/inorder-timing.py index af58cafa5..ddf37b5ec 100644 --- a/tests/configs/inorder-timing.py +++ b/tests/configs/inorder-timing.py @@ -37,8 +37,12 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = InOrderCPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB', latency='10ns')) cpu.clock = '2GHz' diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py index d75bd3d8c..f62381473 100644 --- a/tests/configs/memtest.py +++ b/tests/configs/memtest.py @@ -38,6 +38,7 @@ class L1(BaseCache): block_size = 64 mshrs = 12 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py index 5c770cdbc..35811282c 100644 --- a/tests/configs/o3-timing-mp.py +++ b/tests/configs/o3-timing-mp.py @@ -39,6 +39,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py index a4c054122..d4a69d94a 100644 --- a/tests/configs/o3-timing.py +++ b/tests/configs/o3-timing.py @@ -37,8 +37,12 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = DerivO3CPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB')) cpu.clock = '2GHz' diff --git a/tests/configs/pc-simple-atomic.py b/tests/configs/pc-simple-atomic.py index 382899eb5..1c35ff2d9 100644 --- a/tests/configs/pc-simple-atomic.py +++ b/tests/configs/pc-simple-atomic.py @@ -43,6 +43,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ class PageTableWalkerCache(BaseCache): mshrs = 10 size = '1kB' tgts_per_mshr = 12 + is_top_level = True # --------------------- # I/O Cache @@ -78,6 +80,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range = AddrRange(0, size=mem_size) forward_snoops = False + is_top_level = True #cpu cpu = AtomicSimpleCPU(cpu_id=0) diff --git a/tests/configs/pc-simple-timing.py b/tests/configs/pc-simple-timing.py index 7452e2542..9c9f4aeca 100644 --- a/tests/configs/pc-simple-timing.py +++ b/tests/configs/pc-simple-timing.py @@ -44,6 +44,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/realview-simple-atomic.py b/tests/configs/realview-simple-atomic.py index ab6d612d4..7340be7a4 100644 --- a/tests/configs/realview-simple-atomic.py +++ b/tests/configs/realview-simple-atomic.py @@ -40,6 +40,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/realview-simple-timing.py b/tests/configs/realview-simple-timing.py index 53b6ab2b2..83b643c52 100644 --- a/tests/configs/realview-simple-timing.py +++ b/tests/configs/realview-simple-timing.py @@ -41,6 +41,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py index d88a9b395..4db741b8a 100644 --- a/tests/configs/simple-atomic-mp.py +++ b/tests/configs/simple-atomic-mp.py @@ -38,6 +38,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py index f5793b282..6f4090ec2 100644 --- a/tests/configs/simple-timing-mp.py +++ b/tests/configs/simple-timing-mp.py @@ -38,6 +38,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff --git a/tests/configs/simple-timing.py b/tests/configs/simple-timing.py index 739e11e55..bc9d016c5 100644 --- a/tests/configs/simple-timing.py +++ b/tests/configs/simple-timing.py @@ -36,8 +36,12 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = TimingSimpleCPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB', latency='10ns')) system = System(cpu = cpu, physmem = PhysicalMemory(), diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py index 7744560f9..125e228a7 100644 --- a/tests/configs/tsunami-o3-dual.py +++ b/tests/configs/tsunami-o3-dual.py @@ -41,6 +41,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-o3.py b/tests/configs/tsunami-o3.py index fd2d66431..13212d5d9 100644 --- a/tests/configs/tsunami-o3.py +++ b/tests/configs/tsunami-o3.py @@ -41,6 +41,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpu = DerivO3CPU(cpu_id=0) diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index 9d3dbaa91..2e56ce851 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -40,6 +40,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -64,6 +65,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py index cbacf1995..3c1981464 100644 --- a/tests/configs/tsunami-simple-atomic.py +++ b/tests/configs/tsunami-simple-atomic.py @@ -40,6 +40,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -64,6 +65,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpu = AtomicSimpleCPU(cpu_id=0) diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index f0105461d..747cdac18 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -40,6 +40,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -64,6 +65,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py index 9a262b3b2..110e6ee74 100644 --- a/tests/configs/tsunami-simple-timing.py +++ b/tests/configs/tsunami-simple-timing.py @@ -41,6 +41,7 @@ class L1(BaseCache): block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ class IOCache(BaseCache): tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpu = TimingSimpleCPU(cpu_id=0) |