mem: Set the cache line size on a system level

This patch removes the notion of a peer block size and instead sets the cache line size on the system level. Previously the size was set per cache, and communicated through the interconnect. There were plenty checks to ensure that everyone had the same size specified, and these checks are now removed. Another benefit that is not yet harnessed is that the cache line size is now known at construction time, rather than after the port binding. Hence, the block size can be locally stored and does not have to be queried every time it is used. A follow-on patch updates the configuration scripts accordingly.
author: Andreas Hansson <andreas.hansson@arm.com> 2013-07-18 08:31:16 -0400
committer: Andreas Hansson <andreas.hansson@arm.com> 2013-07-18 08:31:16 -0400
commit: d4273cc9a6f3c00566e97ebcd71509ed14477b37 (patch)
tree: 9b50625fc5d2bb457a959f379a45687903660237 /src/cpu
parent: 4e8ecd7c6fd0447f563179b5a8fdbb13b562ca9e (diff)
download: gem5-d4273cc9a6f3c00566e97ebcd71509ed14477b37.tar.xz
13 files changed, 43 insertions, 74 deletions
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 5b76f3f7c..25fe9bf97 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -119,7 +119,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
       _instMasterId(p->system->getMasterId(name() + ".inst")),
       _dataMasterId(p->system->getMasterId(name() + ".data")),
       _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
-      _switchedOut(p->switched_out),
+      _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
       interrupts(p->interrupts), profileEvent(NULL),
       numThreads(p->numThreads), system(p->system)
 {
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 0a3600764..a91ba862d 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -57,12 +57,12 @@
 #include "sim/eventq.hh"
 #include "sim/full_system.hh"
 #include "sim/insttracer.hh"
+#include "sim/system.hh"
 
 struct BaseCPUParams;
 class BranchPred;
 class CheckerCPU;
 class ThreadContext;
-class System;
 
 class CPUProgressEvent : public Event
 {
@@ -117,6 +117,9 @@ class BaseCPU : public MemObject
     /** Is the CPU switched out or active? */
     bool _switchedOut;
 
+    /** Cache the cache line size that we get from the system */
+    const unsigned int _cacheLineSize;
+
   public:
 
     /**
@@ -343,6 +346,11 @@ class BaseCPU : public MemObject
     System *system;
 
     /**
+     * Get the cache line size of the system.
+     */
+    inline unsigned int cacheLineSize() const { return _cacheLineSize; }
+
+    /**
      * Serialize this object to the given output stream.
      *
      * @note CPU models should normally overload the serializeThread()
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 20278bd30..8989a438a 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011,2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -976,7 +976,7 @@ BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
                                 RequestPtr &sreqHigh)
 {
     // Check to see if the request crosses the next level block boundary.
-    unsigned block_size = cpu->getDataPort().peerBlockSize();
+    unsigned block_size = cpu->cacheLineSize();
     Addr addr = req->getVaddr();
     Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
     assert(split_addr <= addr || split_addr - addr < block_size);
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index c824121be..5cb1ccf18 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011,2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -142,9 +142,8 @@ Fault
 CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags)
 {
     Fault fault = NoFault;
-    unsigned blockSize = dcachePort->peerBlockSize();
     int fullSize = size;
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;
@@ -236,10 +235,9 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
     bool flags_match = true;
     Addr pAddr = 0x0;
 
-    unsigned blockSize = dcachePort->peerBlockSize();
     int fullSize = size;
 
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if (secondAddr > addr)
         size = secondAddr - addr;
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index e380c79d4..9a46641ac 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -112,7 +112,7 @@ CacheUnit::init()
         reqs[i] = new CacheRequest(this);
     }
 
-    cacheBlkSize = cachePort->peerBlockSize();
+    cacheBlkSize = cpu->cacheLineSize();
     cacheBlkMask = cacheBlkSize  - 1;
 
     initSlots();
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 23245d496..35f58ff74 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -216,9 +216,6 @@ class DefaultFetch
     /** Initialize stage. */
     void startupStage();
 
-    /** Tells the fetch stage that the Icache is set. */
-    void setIcache();
-
     /** Handles retrying the fetch access. */
     void recvRetry();
 
@@ -464,7 +461,7 @@ class DefaultFetch
     ThreadID retryTid;
 
     /** Cache block size. */
-    int cacheBlkSize;
+    unsigned int cacheBlkSize;
 
     /** Mask to get a cache block's address. */
     Addr cacheBlkMask;
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 43effa9d7..0445de921 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -81,6 +81,8 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
       fetchWidth(params->fetchWidth),
       retryPkt(NULL),
       retryTid(InvalidThreadID),
+      cacheBlkSize(cpu->cacheLineSize()),
+      cacheBlkMask(cacheBlkSize - 1),
       numThreads(params->numThreads),
       numFetchingThreads(params->smtNumFetchingThreads),
       finishTranslationEvent(this)
@@ -126,11 +128,17 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
     instSize = sizeof(TheISA::MachInst);
 
     for (int i = 0; i < Impl::MaxThreads; i++) {
-        cacheData[i] = NULL;
         decoder[i] = new TheISA::Decoder;
     }
 
     branchPred = params->branchPred;
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        // Create space to store a cache line.
+        cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
+    }
 }
 
 template <class Impl>
@@ -336,34 +344,6 @@ DefaultFetch<Impl>::resetStage()
 
     wroteToTimeBuffer = false;
     _status = Inactive;
-
-    // this CPU could still be unconnected if we are restoring from a
-    // checkpoint and this CPU is to be switched in, thus we can only
-    // do this here if the instruction port is actually connected, if
-    // not we have to do it as part of takeOverFrom.
-    if (cpu->getInstPort().isConnected())
-        setIcache();
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::setIcache()
-{
-    assert(cpu->getInstPort().isConnected());
-
-    // Size of cache block.
-    cacheBlkSize = cpu->getInstPort().peerBlockSize();
-
-    // Create mask to get rid of offset bits.
-    cacheBlkMask = (cacheBlkSize - 1);
-
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        // Create space to store a cache line.
-        if (!cacheData[tid])
-            cacheData[tid] = new uint8_t[cacheBlkSize];
-        cacheDataPC[tid] = 0;
-        cacheDataValid[tid] = false;
-    }
 }
 
 template<class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index f0b27ba41..077af1dd7 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -1,3 +1,4 @@
+
 /*
  * Copyright (c) 2010-2012 ARM Limited
  * All rights reserved
@@ -190,7 +191,7 @@ LSQUnit<Impl>::resetState()
     isLoadBlocked = false;
     loadBlockedHandled = false;
 
-    cacheBlockMask = 0;
+    cacheBlockMask = ~(cpu->cacheLineSize() - 1);
 }
 
 template<class Impl>
@@ -419,16 +420,6 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
 {
     int load_idx = loadHead;
 
-    if (!cacheBlockMask) {
-        assert(dcachePort);
-        Addr bs = dcachePort->peerBlockSize();
-
-        // Make sure we actually got a size
-        assert(bs != 0);
-
-        cacheBlockMask = ~(bs - 1);
-    }
-
     // Unlock the cpu-local monitor when the CPU sees a snoop to a locked
     // address. The CPU can speculatively execute a LL operation after a pending
     // SC operation in the pipeline and that can make the cache monitor the CPU
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 1dd9675f9..ffd1c4d43 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -287,14 +287,12 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
         traceData->setAddr(addr);
     }
 
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
     //The size of the data we're trying to read.
     int fullSize = size;
 
     //The address of the second part of this access if it needs to be split
     //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if (secondAddr > addr)
         size = secondAddr - addr;
@@ -375,14 +373,12 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
         traceData->setAddr(addr);
     }
 
-    //The block size of our peer.
-    unsigned blockSize = dcachePort.peerBlockSize();
     //The size of the data we're trying to read.
     int fullSize = size;
 
     //The address of the second part of this access if it needs to be split
     //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 
     if(secondAddr > addr)
         size = secondAddr - addr;
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 1f453ca63..87a5245b2 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -404,7 +404,7 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
     const int asid = 0;
     const ThreadID tid = 0;
     const Addr pc = thread->instAddr();
-    unsigned block_size = dcachePort.peerBlockSize();
+    unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Read;
 
     if (traceData) {
@@ -473,7 +473,7 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
     const int asid = 0;
     const ThreadID tid = 0;
     const Addr pc = thread->instAddr();
-    unsigned block_size = dcachePort.peerBlockSize();
+    unsigned block_size = cacheLineSize();
     BaseTLB::Mode mode = BaseTLB::Write;
 
     if (traceData) {
diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc
index 37684ec5d..7f3ff0d03 100644
--- a/src/cpu/testers/memtest/memtest.cc
+++ b/src/cpu/testers/memtest/memtest.cc
@@ -95,7 +95,7 @@ MemTest::MemTest(const Params *p)
       tickEvent(this),
       cachePort("test", this),
       funcPort("functional", this),
-      funcProxy(funcPort),
+      funcProxy(funcPort, p->sys->cacheLineSize()),
       retryPkt(NULL),
 //      mainMem(main_mem),
 //      checkMem(check_mem),
@@ -105,6 +105,7 @@ MemTest::MemTest(const Params *p)
       percentUncacheable(p->percent_uncacheable),
       issueDmas(p->issue_dmas),
       masterId(p->sys->getMasterId(name())),
+      blockSize(p->sys->cacheLineSize()),
       progressInterval(p->progress_interval),
       nextProgressMessage(p->progress_interval),
       percentSourceUnaligned(p->percent_source_unaligned),
@@ -121,6 +122,9 @@ MemTest::MemTest(const Params *p)
     baseAddr2 = 0x400000;
     uncacheAddr = 0x800000;
 
+    blockAddrMask = blockSize - 1;
+    traceBlockAddr = blockAddr(traceBlockAddr);
+
     // set up counters
     noResponseCycles = 0;
     numReads = 0;
@@ -145,11 +149,6 @@ MemTest::getMasterPort(const std::string &if_name, PortID idx)
 void
 MemTest::init()
 {
-    // By the time init() is called, the ports should be hooked up.
-    blockSize = cachePort.peerBlockSize();
-    blockAddrMask = blockSize - 1;
-    traceBlockAddr = blockAddr(traceBlockAddr);
-
     // initial memory contents for both physical memory and functional
     // memory should be 0; no need to initialize them.
 }
diff --git a/src/cpu/testers/traffic_gen/traffic_gen.cc b/src/cpu/testers/traffic_gen/traffic_gen.cc
index ed3518bb4..0f006e67e 100644
--- a/src/cpu/testers/traffic_gen/traffic_gen.cc
+++ b/src/cpu/testers/traffic_gen/traffic_gen.cc
@@ -268,11 +268,10 @@ TrafficGen::parseConfig()
                             max_period, read_percent);
 
 
-                    if (port.deviceBlockSize() &&
-                        blocksize > port.deviceBlockSize())
+                    if (blocksize > system->cacheLineSize())
                         fatal("TrafficGen %s block size (%d) is larger than "
-                              "port block size (%d)\n", name(),
-                              blocksize, port.deviceBlockSize());
+                              "system block size (%d)\n", name(),
+                              blocksize, system->cacheLineSize());
 
                     if (read_percent > 100)
                         fatal("%s cannot have more than 100% reads", name());
diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc
index d0f946989..7953b53c8 100644
--- a/src/cpu/thread_state.cc
+++ b/src/cpu/thread_state.cc
@@ -110,7 +110,8 @@ ThreadState::initMemProxies(ThreadContext *tc)
         // This cannot be done in the constructor as the thread state
         // itself is created in the base cpu constructor and the
         // getDataPort is a virtual function
-        physProxy = new PortProxy(baseCpu->getDataPort());
+        physProxy = new PortProxy(baseCpu->getDataPort(),
+                                  baseCpu->cacheLineSize());
 
         assert(virtProxy == NULL);
         virtProxy = new FSTranslatingPortProxy(tc);
author	Andreas Hansson <andreas.hansson@arm.com>	2013-07-18 08:31:16 -0400
committer	Andreas Hansson <andreas.hansson@arm.com>	2013-07-18 08:31:16 -0400
commit	d4273cc9a6f3c00566e97ebcd71509ed14477b37 (patch)
tree	9b50625fc5d2bb457a959f379a45687903660237 /src/cpu
parent	4e8ecd7c6fd0447f563179b5a8fdbb13b562ca9e (diff)
download	gem5-d4273cc9a6f3c00566e97ebcd71509ed14477b37.tar.xz