summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mem/cache/BaseCache.py4
-rw-r--r--src/mem/cache/base.cc218
-rw-r--r--src/mem/cache/base.hh48
-rw-r--r--src/mem/cache/blk.hh7
-rw-r--r--src/mem/cache/cache_impl.hh33
-rw-r--r--src/mem/cache/tags/base.cc12
-rw-r--r--src/mem/cache/tags/base.hh4
-rw-r--r--src/mem/cache/tags/lru.cc55
-rw-r--r--src/mem/request.hh5
-rw-r--r--tests/configs/memtest.py1
-rw-r--r--tests/configs/o3-timing-mp.py1
-rw-r--r--tests/configs/realview-o3-dual.py1
-rw-r--r--tests/configs/realview-simple-timing-dual.py1
-rw-r--r--tests/configs/simple-atomic-mp.py1
-rw-r--r--tests/configs/simple-timing-mp.py1
-rw-r--r--tests/configs/tsunami-o3-dual.py1
-rw-r--r--tests/configs/tsunami-simple-atomic-dual.py1
-rw-r--r--tests/configs/tsunami-simple-timing-dual.py1
18 files changed, 251 insertions, 144 deletions
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index e9e60859f..4389eb356 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -27,7 +27,7 @@
# Authors: Nathan Binkert
from m5.params import *
-from m5.proxy import Self
+from m5.proxy import *
from MemObject import MemObject
from Prefetcher import BasePrefetcher
@@ -44,7 +44,6 @@ class BaseCache(MemObject):
prioritizeRequests = Param.Bool(False,
"always service demand misses first")
repl = Param.Repl(NULL, "replacement policy")
- num_cpus = Param.Int(1, "number of cpus sharing this cache")
size = Param.MemorySize("capacity in bytes")
forward_snoops = Param.Bool(True,
"forward snoops from mem side to cpu side")
@@ -62,3 +61,4 @@ class BaseCache(MemObject):
cpu_side = Port("Port on side closer to CPU")
mem_side = Port("Port on side closer to MEM")
addr_range = Param.AddrRange(AllMemory, "The address range for the CPU-side port")
+ system = Param.System(Parent.any, "System we belong to")
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index b0fb3bc6c..27ff6961b 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -65,7 +65,7 @@ BaseCache::BaseCache(const Params *p)
missCount(p->max_miss_count),
drainEvent(NULL),
addrRange(p->addr_range),
- _numCpus(p->num_cpus)
+ system(p->system)
{
}
@@ -143,11 +143,14 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
hits[access_idx]
- .init(FullSystem ? (_numCpus + 1) : _numCpus)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_hits")
.desc("number of " + cstr + " hits")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ hits[access_idx].subname(i, system->getMasterName(i));
+ }
}
// These macros make it easier to sum the right subset of commands and
@@ -163,16 +166,22 @@ BaseCache::regStats()
demandHits
.name(name() + ".demand_hits")
.desc("number of demand (read+write) hits")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandHits = SUM_DEMAND(hits);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandHits.subname(i, system->getMasterName(i));
+ }
overallHits
.name(name() + ".overall_hits")
.desc("number of overall hits")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallHits = demandHits + SUM_NON_DEMAND(hits);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallHits.subname(i, system->getMasterName(i));
+ }
// Miss statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -180,26 +189,35 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
misses[access_idx]
- .init(FullSystem ? (_numCpus + 1) : _numCpus)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_misses")
.desc("number of " + cstr + " misses")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ misses[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMisses
.name(name() + ".demand_misses")
.desc("number of demand (read+write) misses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMisses = SUM_DEMAND(misses);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMisses.subname(i, system->getMasterName(i));
+ }
overallMisses
.name(name() + ".overall_misses")
.desc("number of overall misses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMisses = demandMisses + SUM_NON_DEMAND(misses);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMisses.subname(i, system->getMasterName(i));
+ }
// Miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -207,26 +225,35 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
missLatency[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_miss_latency")
.desc("number of " + cstr + " miss cycles")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ missLatency[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMissLatency
.name(name() + ".demand_miss_latency")
.desc("number of demand (read+write) miss cycles")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMissLatency = SUM_DEMAND(missLatency);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMissLatency.subname(i, system->getMasterName(i));
+ }
overallMissLatency
.name(name() + ".overall_miss_latency")
.desc("number of overall miss cycles")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMissLatency.subname(i, system->getMasterName(i));
+ }
// access formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -238,23 +265,32 @@ BaseCache::regStats()
.desc("number of " + cstr + " accesses(hits+misses)")
.flags(total | nozero | nonan)
;
-
accesses[access_idx] = hits[access_idx] + misses[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ accesses[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandAccesses
.name(name() + ".demand_accesses")
.desc("number of demand (read+write) accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandAccesses = demandHits + demandMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandAccesses.subname(i, system->getMasterName(i));
+ }
overallAccesses
.name(name() + ".overall_accesses")
.desc("number of overall (read+write) accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallAccesses = overallHits + overallMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallAccesses.subname(i, system->getMasterName(i));
+ }
// miss rate formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -266,23 +302,32 @@ BaseCache::regStats()
.desc("miss rate for " + cstr + " accesses")
.flags(total | nozero | nonan)
;
-
missRate[access_idx] = misses[access_idx] / accesses[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ missRate[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMissRate
.name(name() + ".demand_miss_rate")
.desc("miss rate for demand accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMissRate = demandMisses / demandAccesses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMissRate.subname(i, system->getMasterName(i));
+ }
overallMissRate
.name(name() + ".overall_miss_rate")
.desc("miss rate for overall accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMissRate = overallMisses / overallAccesses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMissRate.subname(i, system->getMasterName(i));
+ }
// miss latency formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -294,24 +339,33 @@ BaseCache::regStats()
.desc("average " + cstr + " miss latency")
.flags(total | nozero | nonan)
;
-
avgMissLatency[access_idx] =
missLatency[access_idx] / misses[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ avgMissLatency[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandAvgMissLatency
.name(name() + ".demand_avg_miss_latency")
.desc("average overall miss latency")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandAvgMissLatency = demandMissLatency / demandMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandAvgMissLatency.subname(i, system->getMasterName(i));
+ }
overallAvgMissLatency
.name(name() + ".overall_avg_miss_latency")
.desc("average overall miss latency")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallAvgMissLatency = overallMissLatency / overallMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallAvgMissLatency.subname(i, system->getMasterName(i));
+ }
blocked_cycles.init(NUM_BLOCKED_CAUSES);
blocked_cycles
@@ -350,11 +404,14 @@ BaseCache::regStats()
;
writebacks
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + ".writebacks")
.desc("number of writebacks")
- .flags(total)
+ .flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ writebacks.subname(i, system->getMasterName(i));
+ }
// MSHR statistics
// MSHR hit statistics
@@ -363,26 +420,35 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
mshr_hits[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_mshr_hits")
.desc("number of " + cstr + " MSHR hits")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_hits[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMshrHits
.name(name() + ".demand_mshr_hits")
.desc("number of demand (read+write) MSHR hits")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMshrHits = SUM_DEMAND(mshr_hits);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMshrHits.subname(i, system->getMasterName(i));
+ }
overallMshrHits
.name(name() + ".overall_mshr_hits")
.desc("number of overall MSHR hits")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrHits.subname(i, system->getMasterName(i));
+ }
// MSHR miss statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -390,26 +456,35 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
mshr_misses[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_mshr_misses")
.desc("number of " + cstr + " MSHR misses")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_misses[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMshrMisses
.name(name() + ".demand_mshr_misses")
.desc("number of demand (read+write) MSHR misses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMshrMisses = SUM_DEMAND(mshr_misses);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMshrMisses.subname(i, system->getMasterName(i));
+ }
overallMshrMisses
.name(name() + ".overall_mshr_misses")
.desc("number of overall MSHR misses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrMisses.subname(i, system->getMasterName(i));
+ }
// MSHR miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -417,27 +492,36 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
mshr_miss_latency[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_mshr_miss_latency")
.desc("number of " + cstr + " MSHR miss cycles")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_miss_latency[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMshrMissLatency
.name(name() + ".demand_mshr_miss_latency")
.desc("number of demand (read+write) MSHR miss cycles")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMshrMissLatency.subname(i, system->getMasterName(i));
+ }
overallMshrMissLatency
.name(name() + ".overall_mshr_miss_latency")
.desc("number of overall MSHR miss cycles")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrMissLatency =
demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrMissLatency.subname(i, system->getMasterName(i));
+ }
// MSHR uncacheable statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -445,20 +529,26 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
mshr_uncacheable[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_mshr_uncacheable")
.desc("number of " + cstr + " MSHR uncacheable")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_uncacheable[access_idx].subname(i, system->getMasterName(i));
+ }
}
overallMshrUncacheable
.name(name() + ".overall_mshr_uncacheable_misses")
.desc("number of overall MSHR uncacheable misses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrUncacheable =
SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrUncacheable.subname(i, system->getMasterName(i));
+ }
// MSHR miss latency statistics
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -466,21 +556,27 @@ BaseCache::regStats()
const string &cstr = cmd.toString();
mshr_uncacheable_lat[access_idx]
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + "." + cstr + "_mshr_uncacheable_latency")
.desc("number of " + cstr + " MSHR uncacheable cycles")
.flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_uncacheable_lat[access_idx].subname(i, system->getMasterName(i));
+ }
}
overallMshrUncacheableLatency
.name(name() + ".overall_mshr_uncacheable_latency")
.desc("number of overall MSHR uncacheable cycles")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrUncacheableLatency =
SUM_DEMAND(mshr_uncacheable_lat) +
SUM_NON_DEMAND(mshr_uncacheable_lat);
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrUncacheableLatency.subname(i, system->getMasterName(i));
+ }
#if 0
// MSHR access formulas
@@ -524,24 +620,33 @@ BaseCache::regStats()
.desc("mshr miss rate for " + cstr + " accesses")
.flags(total | nozero | nonan)
;
-
mshrMissRate[access_idx] =
mshr_misses[access_idx] / accesses[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshrMissRate[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandMshrMissRate
.name(name() + ".demand_mshr_miss_rate")
.desc("mshr miss rate for demand accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandMshrMissRate = demandMshrMisses / demandAccesses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandMshrMissRate.subname(i, system->getMasterName(i));
+ }
overallMshrMissRate
.name(name() + ".overall_mshr_miss_rate")
.desc("mshr miss rate for overall accesses")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallMshrMissRate = overallMshrMisses / overallAccesses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallMshrMissRate.subname(i, system->getMasterName(i));
+ }
// mshrMiss latency formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -553,24 +658,33 @@ BaseCache::regStats()
.desc("average " + cstr + " mshr miss latency")
.flags(total | nozero | nonan)
;
-
avgMshrMissLatency[access_idx] =
mshr_miss_latency[access_idx] / mshr_misses[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ avgMshrMissLatency[access_idx].subname(i, system->getMasterName(i));
+ }
}
demandAvgMshrMissLatency
.name(name() + ".demand_avg_mshr_miss_latency")
.desc("average overall mshr miss latency")
- .flags(total)
+ .flags(total | nozero | nonan)
;
demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ demandAvgMshrMissLatency.subname(i, system->getMasterName(i));
+ }
overallAvgMshrMissLatency
.name(name() + ".overall_avg_mshr_miss_latency")
.desc("average overall mshr miss latency")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallAvgMshrMissLatency.subname(i, system->getMasterName(i));
+ }
// mshrUncacheable latency formulas
for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -582,32 +696,44 @@ BaseCache::regStats()
.desc("average " + cstr + " mshr uncacheable latency")
.flags(total | nozero | nonan)
;
-
avgMshrUncacheableLatency[access_idx] =
mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
+
+ for (int i = 0; i < system->maxMasters(); i++) {
+ avgMshrUncacheableLatency[access_idx].subname(i, system->getMasterName(i));
+ }
}
overallAvgMshrUncacheableLatency
.name(name() + ".overall_avg_mshr_uncacheable_latency")
.desc("average overall mshr uncacheable latency")
- .flags(total)
+ .flags(total | nozero | nonan)
;
overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i));
+ }
mshr_cap_events
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + ".mshr_cap_events")
.desc("number of times MSHR cap was activated")
- .flags(total)
+ .flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ mshr_cap_events.subname(i, system->getMasterName(i));
+ }
//software prefetching stats
soft_prefetch_mshr_full
- .init(maxThreadsPerCPU)
+ .init(system->maxMasters())
.name(name() + ".soft_prefetch_mshr_full")
.desc("number of mshr full events for SW prefetching instrutions")
- .flags(total)
+ .flags(total | nozero | nonan)
;
+ for (int i = 0; i < system->maxMasters(); i++) {
+ soft_prefetch_mshr_full.subname(i, system->getMasterName(i));
+ }
mshr_no_allocate_misses
.name(name() +".no_allocate_misses")
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 3aaed4455..cff8813cd 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -58,6 +58,7 @@
#include "sim/eventq.hh"
#include "sim/full_system.hh"
#include "sim/sim_exit.hh"
+#include "sim/system.hh"
class MSHR;
/**
@@ -220,11 +221,10 @@ class BaseCache : public MemObject
* Normally this is all possible memory addresses. */
Range<Addr> addrRange;
- /** number of cpus sharing this cache - from config file */
- int _numCpus;
-
public:
- int numCpus() { return _numCpus; }
+ /** System we are currently operating in. */
+ System *system;
+
// Statistics
/**
* @addtogroup CacheStatistics
@@ -488,23 +488,10 @@ class BaseCache : public MemObject
virtual bool inMissQueue(Addr addr) = 0;
- void incMissCount(PacketPtr pkt, int id)
+ void incMissCount(PacketPtr pkt)
{
-
- if (pkt->cmd == MemCmd::Writeback) {
- assert(id == -1);
- misses[pkt->cmdToIndex()][0]++;
- /* same thing for writeback hits as misses - no context id
- * available, meanwhile writeback hit/miss stats are not used
- * in any aggregate hit/miss calculations, so just lump them all
- * in bucket 0 */
- } else if (FullSystem && id == -1) {
- // Device accesses have id -1
- // lump device accesses into their own bucket
- misses[pkt->cmdToIndex()][_numCpus]++;
- } else {
- misses[pkt->cmdToIndex()][id % _numCpus]++;
- }
+ assert(pkt->req->masterId() < system->maxMasters());
+ misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
if (missCount) {
--missCount;
@@ -512,26 +499,11 @@ class BaseCache : public MemObject
exitSimLoop("A cache reached the maximum miss count");
}
}
- void incHitCount(PacketPtr pkt, int id)
+ void incHitCount(PacketPtr pkt)
{
+ assert(pkt->req->masterId() < system->maxMasters());
+ hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
- /* Writeback requests don't have a context id associated with
- * them, so attributing a hit to a -1 context id is obviously a
- * problem. I've noticed in the stats that hits are split into
- * demand and non-demand hits - neither of which include writeback
- * hits, so here, I'll just put the writeback hits into bucket 0
- * since it won't mess with any other stats -hsul */
- if (pkt->cmd == MemCmd::Writeback) {
- assert(id == -1);
- hits[pkt->cmdToIndex()][0]++;
- } else if (FullSystem && id == -1) {
- // Device accesses have id -1
- // lump device accesses into their own bucket
- hits[pkt->cmdToIndex()][_numCpus]++;
- } else {
- /* the % is necessary in case there are switch cpus */
- hits[pkt->cmdToIndex()][id % _numCpus]++;
- }
}
};
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index e70760edd..91970e09b 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -103,8 +103,8 @@ class CacheBlk
/** Number of references to this block since it was brought in. */
int refCount;
- /** holds the context source ID of the requestor for this block. */
- int contextSrc;
+ /** holds the source requestor ID for this block. */
+ int srcMasterId;
protected:
/**
@@ -135,7 +135,8 @@ class CacheBlk
CacheBlk()
: asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
- set(-1), isTouched(false), refCount(0), contextSrc(-1)
+ set(-1), isTouched(false), refCount(0),
+ srcMasterId(Request::invldMasterId)
{}
/**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index fbab8465e..87b688617 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -312,7 +312,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
// OK to satisfy access
- incHitCount(pkt, id);
+ incHitCount(pkt);
satisfyCpuSideRequest(pkt, blk);
return true;
}
@@ -332,10 +332,10 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
if (blk == NULL) {
// no replaceable block available, give up.
// writeback will be forwarded to next level.
- incMissCount(pkt, id);
+ incMissCount(pkt);
return false;
}
- int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+ int id = pkt->req->masterId();
tags->insertBlock(pkt->getAddr(), blk, id);
blk->status = BlkValid | BlkReadable;
}
@@ -346,11 +346,11 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
}
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
- incHitCount(pkt, id);
+ incHitCount(pkt);
return true;
}
- incMissCount(pkt, id);
+ incMissCount(pkt);
if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
// complete miss on store conditional... just give up now
@@ -514,7 +514,8 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
if (mshr) {
// MSHR hit
//@todo remove hw_pf here
- mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ assert(pkt->req->masterId() < system->maxMasters());
+ mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
mshr->threadNum = -1;
}
@@ -529,7 +530,8 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
}
} else {
// no MSHR
- mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ assert(pkt->req->masterId() < system->maxMasters());
+ mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
// always mark as cache fill for now... if we implement
// no-write-allocate or bypass accesses this will have to
// be changed.
@@ -849,10 +851,12 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
PacketList writebacks;
if (pkt->req->isUncacheable()) {
- mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
+ assert(pkt->req->masterId() < system->maxMasters());
+ mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
miss_latency;
} else {
- mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] +=
+ assert(pkt->req->masterId() < system->maxMasters());
+ mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
miss_latency;
}
@@ -898,7 +902,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
(transfer_offset ? pkt->finishTime : pkt->firstWordTime);
assert(!target->pkt->req->isUncacheable());
- missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] +=
+
+ assert(pkt->req->masterId() < system->maxMasters());
+ missLatency[target->pkt->cmdToIndex()][target->pkt->req->masterId()] +=
completion_time - target->recvTime;
} else if (pkt->cmd == MemCmd::UpgradeFailResp) {
// failed StoreCond upgrade
@@ -1003,7 +1009,7 @@ Cache<TagStore>::writebackBlk(BlkType *blk)
{
assert(blk && blk->isValid() && blk->isDirty());
- writebacks[0/*pkt->req->threadId()*/]++;
+ writebacks[Request::wbMasterId]++;
Request *writebackReq =
new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
@@ -1082,7 +1088,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
tempBlock->tag = tags->extractTag(addr);
DPRINTF(Cache, "using temp block for %x\n", addr);
} else {
- int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+ int id = pkt->req->masterId();
tags->insertBlock(pkt->getAddr(), blk, id);
}
@@ -1427,7 +1433,8 @@ Cache<TagStore>::getNextMSHR()
!writeBuffer.findMatch(pf_addr)) {
// Update statistic on number of prefetches issued
// (hwpf_mshr_misses)
- mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+ assert(pkt->req->masterId() < system->maxMasters());
+ mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
// Don't request bus, since we already have it
return allocateMissBuffer(pkt, curTick(), false);
} else {
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index ea97954f1..0cabce860 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -87,17 +87,23 @@ BaseTags::regStats(const string &name)
;
occupancies
- .init(cache->numCpus() + 1)
+ .init(cache->system->maxMasters())
.name(name + ".occ_blocks")
- .desc("Average occupied blocks per context")
+ .desc("Average occupied blocks per requestor")
.flags(nozero | nonan)
;
+ for (int i = 0; i < cache->system->maxMasters(); i++) {
+ occupancies.subname(i, cache->system->getMasterName(i));
+ }
avgOccs
.name(name + ".occ_percent")
.desc("Average percentage of cache occupancy")
- .flags(nozero)
+ .flags(nozero | total)
;
+ for (int i = 0; i < cache->system->maxMasters(); i++) {
+ avgOccs.subname(i, cache->system->getMasterName(i));
+ }
avgOccs = occupancies / Stats::constant(numBlocks);
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index 93856c19e..576b512e5 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -97,10 +97,10 @@ class BaseTags
/** The cycle that the warmup percentage was hit. */
Stats::Scalar warmupCycle;
- /** Average occupancy of each context/cpu using the cache */
+ /** Average occupancy of each requestor using the cache */
Stats::AverageVector occupancies;
- /** Average occ % of each context/cpu using the cache */
+ /** Average occ % of each requestor using the cache */
Stats::Formula avgOccs;
/**
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 33f0f14a9..babcedc89 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -116,7 +116,7 @@ LRU::~LRU()
}
LRU::BlkType*
-LRU::accessBlock(Addr addr, int &lat, int context_src)
+LRU::accessBlock(Addr addr, int &lat, int master_id)
{
Addr tag = extractTag(addr);
unsigned set = extractSet(addr);
@@ -153,20 +153,8 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
unsigned set = extractSet(addr);
// grab a replacement candidate
BlkType *blk = sets[set].blks[assoc-1];
- if (blk->isValid()) {
- replacements[0]++;
- totalRefs += blk->refCount;
- ++sampledRefs;
- blk->refCount = 0;
-
- // deal with evicted block
- if (blk->contextSrc != -1) {
- occupancies[blk->contextSrc % cache->numCpus()]--;
- blk->contextSrc = -1;
- } else {
- occupancies[cache->numCpus()]--;
- }
+ if (blk->isValid()) {
DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
set, regenerateBlkAddr(blk->tag, set));
}
@@ -174,7 +162,7 @@ LRU::findVictim(Addr addr, PacketList &writebacks)
}
void
-LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
+LRU::insertBlock(Addr addr, BlkType *blk, int master_id)
{
if (!blk->isTouched) {
tagsInUse++;
@@ -185,16 +173,28 @@ LRU::insertBlock(Addr addr, BlkType *blk, int context_src)
}
}
+ // If we're replacing a block that was previously valid update
+ // stats for it. This can't be done in findBlock() because a
+ // found block might not actually be replaced there if the
+ // coherence protocol says it can't be.
+ if (blk->isValid()) {
+ replacements[0]++;
+ totalRefs += blk->refCount;
+ ++sampledRefs;
+ blk->refCount = 0;
+
+ // deal with evicted block
+ assert(blk->srcMasterId < cache->system->maxMasters());
+ occupancies[blk->srcMasterId]--;
+ }
+
// Set tag for new block. Caller is responsible for setting status.
blk->tag = extractTag(addr);
// deal with what we are bringing in
- if (context_src != -1) {
- occupancies[context_src % cache->numCpus()]++;
- } else {
- occupancies[cache->numCpus()]++;
- }
- blk->contextSrc = context_src;
+ assert(master_id < cache->system->maxMasters());
+ occupancies[master_id]++;
+ blk->srcMasterId = master_id;
unsigned set = extractSet(addr);
sets[set].moveToHead(blk);
@@ -204,16 +204,15 @@ void
LRU::invalidateBlk(BlkType *blk)
{
if (blk) {
+ if (blk->isValid()) {
+ tagsInUse--;
+ assert(blk->srcMasterId < cache->system->maxMasters());
+ occupancies[blk->srcMasterId]--;
+ blk->srcMasterId = Request::invldMasterId;
+ }
blk->status = 0;
blk->isTouched = false;
blk->clearLoadLocks();
- tagsInUse--;
- if (blk->contextSrc != -1) {
- occupancies[blk->contextSrc % cache->numCpus()]--;
- blk->contextSrc = -1;
- } else {
- occupancies[cache->numCpus()]--;
- }
}
}
diff --git a/src/mem/request.hh b/src/mem/request.hh
index b6128f450..68ef0540a 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -40,6 +40,7 @@
#define __MEM_REQUEST_HH__
#include <cassert>
+#include <climits>
#include "base/fast_alloc.hh"
#include "base/flags.hh"
@@ -111,6 +112,10 @@ class Request : public FastAlloc
static const MasterID funcMasterId = 1;
/** This request id is used for message signaled interrupts */
static const MasterID intMasterId = 2;
+ /** Invalid request id for assertion checking only. It is invalid behavior
+ * to ever send this id as part of a request.
+ * @todo C++1x replace with numeric_limits when constexpr is added */
+ static const MasterID invldMasterId = USHRT_MAX;
/** @} */
private:
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index c1358eecd..edb18f39a 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -64,7 +64,6 @@ system = System(cpu = cpus, funcmem = PhysicalMemory(),
system.toL2Bus = Bus(clock="500GHz", width=16)
system.l2c = L2(size='64kB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
-system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py
index 9436cf88a..95323c2f6 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -63,7 +63,6 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
-system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/realview-o3-dual.py b/tests/configs/realview-o3-dual.py
index adab96fcb..42532065b 100644
--- a/tests/configs/realview-o3-dual.py
+++ b/tests/configs/realview-o3-dual.py
@@ -83,7 +83,6 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
-system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/realview-simple-timing-dual.py b/tests/configs/realview-simple-timing-dual.py
index 81646f825..95daa81b6 100644
--- a/tests/configs/realview-simple-timing-dual.py
+++ b/tests/configs/realview-simple-timing-dual.py
@@ -83,7 +83,6 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
-system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
index db0c0b9c0..2fa7edb2a 100644
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -62,7 +62,6 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
-system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
index c82ef0a26..06d535154 100644
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -62,7 +62,6 @@ Bus())
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
-system.l2c.num_cpus = nb_cores
# connect l2c to membus
system.l2c.mem_side = system.membus.port
diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py
index 1680be166..1acfc903b 100644
--- a/tests/configs/tsunami-o3-dual.py
+++ b/tests/configs/tsunami-o3-dual.py
@@ -85,7 +85,6 @@ system.iocache.mem_side = system.membus.port
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
-system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index 9a29f5c65..ddc7dd1d7 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -83,7 +83,6 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
-system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus:
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index 6b78b71f4..48740ea15 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -83,7 +83,6 @@ system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
system.l2c.mem_side = system.membus.port
-system.l2c.num_cpus = 2
#connect up the cpu and l1s
for c in cpus: