Mem: Use cycles to express cache-related latencies

This patch changes the cache-related latencies from an absolute time expressed in Ticks, to a number of cycles that can be scaled with the clock period of the caches. Ultimately this patch serves to enable future work that involves dynamic frequency scaling. As an immediate benefit it also makes it more convenient to specify cache performance without implicitly assuming a specific CPU core operating frequency. The stat blocked_cycles that actually counter in ticks is now updated to count in cycles. As the timing is now rounded to the clock edges of the cache, there are some regressions that change. Plenty of them have very minor changes, whereas some regressions with a short run-time are perturbed quite significantly. A follow-on patch updates all the statistics for the regressions.
author: Andreas Hansson <andreas.hansson@arm.com> 2012-10-15 08:10:54 -0400
committer: Andreas Hansson <andreas.hansson@arm.com> 2012-10-15 08:10:54 -0400
commit: 88554790c34f6fef4ba6285927fb9742b90ab258 (patch)
tree: 402fe474613aea36065f773f410d431637592955 /configs
parent: d17f5084ed93efd6bdb3ed46b2f81b9d1240af8c (diff)
download: gem5-88554790c34f6fef4ba6285927fb9742b90ab258.tar.xz
3 files changed, 31 insertions, 21 deletions
diff --git a/configs/common/Caches.py b/configs/common/Caches.py
index f16a83559..0b5f9e182 100644
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2012 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -31,8 +43,8 @@ from m5.objects import *
 class L1Cache(BaseCache):
     assoc = 2
     block_size = 64
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 2
+    response_latency = 2
     mshrs = 10
     tgts_per_mshr = 20
     is_top_level = True
@@ -40,16 +52,16 @@ class L1Cache(BaseCache):
 class L2Cache(BaseCache):
     assoc = 8
     block_size = 64
-    hit_latency = '10ns'
-    response_latency = '10ns'
+    hit_latency = 20
+    response_latency = 20
     mshrs = 20
     tgts_per_mshr = 12
 
 class PageTableWalkerCache(BaseCache):
     assoc = 2
     block_size = 64
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 2
+    response_latency = 2
     mshrs = 10
     size = '1kB'
     tgts_per_mshr = 12
@@ -58,8 +70,8 @@ class PageTableWalkerCache(BaseCache):
 class IOCache(BaseCache):
     assoc = 8
     block_size = 64
-    hit_latency = '10ns'
-    response_latency = '10ns'
+    hit_latency = 50
+    response_latency = 50
     mshrs = 20
     size = '1kB'
     tgts_per_mshr = 12
diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py
index 20ef10ebc..c971df7fb 100644
--- a/configs/common/O3_ARM_v7a.py
+++ b/configs/common/O3_ARM_v7a.py
@@ -145,10 +145,9 @@ class O3_ARM_v7a_3(DerivO3CPU):
     defer_registration= False
 
 # Instruction Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be faster
 class O3_ARM_v7a_ICache(BaseCache):
-    hit_latency = '1ns'
-    response_latency = '1ns'
+    hit_latency = 1
+    response_latency = 1
     block_size = 64
     mshrs = 2
     tgts_per_mshr = 8
@@ -157,10 +156,9 @@ class O3_ARM_v7a_ICache(BaseCache):
     is_top_level = 'true'
 
 # Data Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be faster
 class O3_ARM_v7a_DCache(BaseCache):
-    hit_latency = '2ns'
-    response_latency = '2ns'
+    hit_latency = 2
+    response_latency = 2
     block_size = 64
     mshrs = 6
     tgts_per_mshr = 8
@@ -172,8 +170,8 @@ class O3_ARM_v7a_DCache(BaseCache):
 # TLB Cache 
 # Use a cache as a L2 TLB
 class O3_ARM_v7aWalkCache(BaseCache):
-    hit_latency = '4ns'
-    response_latency = '4ns'
+    hit_latency = 4
+    response_latency = 4
     block_size = 64
     mshrs = 6
     tgts_per_mshr = 8
@@ -184,10 +182,9 @@ class O3_ARM_v7aWalkCache(BaseCache):
 
 
 # L2 Cache
-# All latencys assume a 1GHz clock rate, with a faster clock they would be faster
 class O3_ARM_v7aL2(BaseCache):
-    hit_latency = '12ns'
-    response_latency = '12ns'
+    hit_latency = 12
+    response_latency = 12
     block_size = 64
     mshrs = 16
     tgts_per_mshr = 8
@@ -196,5 +193,5 @@ class O3_ARM_v7aL2(BaseCache):
     write_buffers = 8
     prefetch_on_access = 'true'
     # Simple stride prefetcher
-    prefetcher = StridePrefetcher(degree=8, latency='1.0ns')
+    prefetcher = StridePrefetcher(degree=8, latency = 1)
 
diff --git a/configs/example/fs.py b/configs/example/fs.py
index 724e32813..ddba8554a 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -122,7 +122,8 @@ if bm[0]:
 else:
     mem_size = SysConfig().mem()
 if options.caches or options.l2cache:
-    test_sys.iocache = IOCache(addr_ranges=[test_sys.physmem.range])
+    test_sys.iocache = IOCache(clock = '1GHz',
+                               addr_ranges=[test_sys.physmem.range])
     test_sys.iocache.cpu_side = test_sys.iobus.master
     test_sys.iocache.mem_side = test_sys.membus.slave
 else:
author	Andreas Hansson <andreas.hansson@arm.com>	2012-10-15 08:10:54 -0400
committer	Andreas Hansson <andreas.hansson@arm.com>	2012-10-15 08:10:54 -0400
commit	88554790c34f6fef4ba6285927fb9742b90ab258 (patch)
tree	402fe474613aea36065f773f410d431637592955 /configs
parent	d17f5084ed93efd6bdb3ed46b2f81b9d1240af8c (diff)
download	gem5-88554790c34f6fef4ba6285927fb9742b90ab258.tar.xz