From c58cb8c9dbeef377da180f1fdaaa1c0eadf85550 Mon Sep 17 00:00:00 2001
From: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Date: Fri, 7 Jul 2017 14:13:11 +0100
Subject: cpu,mem: Add support for partial loads/stores and wide mem. accesses

This changeset adds support for partial (or masked) loads/stores, i.e.
loads/stores that can disable accesses to individual bytes within the
target address range.  In addition, this changeset extends the code to
crack memory accesses across most CPU models (TimingSimpleCPU still
TBD), so that arbitrarily wide memory accesses are supported.  These
changes are required for supporting ISAs with wide vectors.

Additional authors:
- Gabor Dozsa <gabor.dozsa@arm.com>
- Tiago Muck <tiago.muck@arm.com>

Change-Id: Ibad33541c258ad72925c0b1d5abc3e5e8bf92d92
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13518
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
---
 src/cpu/checker/cpu.cc | 120 ++++++++++++++++++++++++++++++++-----------------
 src/cpu/checker/cpu.hh |  29 +++++++++++-
 2 files changed, 106 insertions(+), 43 deletions(-)

(limited to 'src/cpu/checker')
diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc
index 7f8eada4c..cca6d6b12 100644
--- a/src/cpu/checker/cpu.cc
+++ b/src/cpu/checker/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013,2017 ARM Limited
+ * Copyright (c) 2011,2013,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -52,6 +52,7 @@
 #include "cpu/simple_thread.hh"
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
+#include "cpu/utils.hh"
 #include "params/CheckerCPU.hh"
 #include "sim/full_system.hh"
 
@@ -139,31 +140,68 @@ CheckerCPU::unserialize(CheckpointIn &cp)
 {
 }
 
+RequestPtr
+CheckerCPU::genMemFragmentRequest(Addr frag_addr, int size,
+                                  Request::Flags flags,
+                                  const std::vector<bool>& byte_enable,
+                                  int& frag_size, int& size_left) const
+{
+    frag_size = std::min(
+        cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+        (Addr) size_left);
+    size_left -= frag_size;
+
+    RequestPtr mem_req;
+
+    if (!byte_enable.empty()) {
+        // Set up byte-enable mask for the current fragment
+        auto it_start = byte_enable.cbegin() + (size - (frag_size +
+                                                        size_left));
+        auto it_end = byte_enable.cbegin() + (size - size_left);
+        if (isAnyActiveElement(it_start, it_end)) {
+            mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                    flags, masterId, thread->pcState().instAddr(),
+                    tc->contextId());
+            mem_req->setByteEnable(std::vector<bool>(it_start, it_end));
+        }
+    } else {
+        mem_req = std::make_shared<Request>(0, frag_addr, frag_size,
+                    flags, masterId, thread->pcState().instAddr(),
+                    tc->contextId());
+    }
+
+    return mem_req;
+}
+
 Fault
 CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
-                    Request::Flags flags)
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable)
 {
     Fault fault = NoFault;
-    int fullSize = size;
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;
 
-
-    if (secondAddr > addr)
-       size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
 
     // Need to account for multiple accesses like the Atomic and TimingSimple
     while (1) {
-        auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+                                                   byteEnable, frag_size,
+                                                   size_left);
+
+        predicate = (mem_req != nullptr);
 
         // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Read);
+        }
 
-        if (!checked_flags && fault == NoFault && unverifiedReq) {
+        if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
             flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
                                      mem_req->getPaddr(), mem_req->getFlags());
             pAddr = mem_req->getPaddr();
@@ -171,7 +209,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
         }
 
         // Now do the access
-        if (fault == NoFault &&
+        if (predicate && fault == NoFault &&
             !mem_req->getFlags().isSet(Request::NO_ACCESS)) {
             PacketPtr pkt = Packet::createRead(mem_req);
 
@@ -182,7 +220,7 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
                 dcachePort->sendFunctional(pkt);
             } else {
                 // Assume the data is correct if it's an uncached access
-                memcpy(data, unverifiedMemData, size);
+                memcpy(data, unverifiedMemData, frag_size);
             }
 
             delete pkt;
@@ -196,22 +234,21 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
         }
 
         //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
+        if (size_left == 0)
         {
             break;
         }
 
         // Setup for accessing next cache line
-        data += size;
-        unverifiedMemData += size;
-        size = addr + fullSize - secondAddr;
-        addr = secondAddr;
+        frag_addr += frag_size;
+        data += frag_size;
+        unverifiedMemData += frag_size;
     }
 
     if (!flags_match) {
         warn("%lli: Flags do not match CPU:%#x %#x %#x Checker:%#x %#x %#x\n",
              curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
-             unverifiedReq->getFlags(), addr, pAddr, flags);
+             unverifiedReq->getFlags(), frag_addr, pAddr, flags);
         handleError();
     }
 
@@ -220,31 +257,35 @@ CheckerCPU::readMem(Addr addr, uint8_t *data, unsigned size,
 
 Fault
 CheckerCPU::writeMem(uint8_t *data, unsigned size,
-                     Addr addr, Request::Flags flags, uint64_t *res)
+                     Addr addr, Request::Flags flags, uint64_t *res,
+                     const std::vector<bool>& byteEnable)
 {
+    assert(byteEnable.empty() || byteEnable.size() == size);
+
     Fault fault = NoFault;
     bool checked_flags = false;
     bool flags_match = true;
     Addr pAddr = 0x0;
     static uint8_t zero_data[64] = {};
 
-    int fullSize = size;
-
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
 
     // Need to account for a multiple access like Atomic and Timing CPUs
     while (1) {
-        auto mem_req = std::make_shared<Request>(
-            0, addr, size, flags, masterId,
-            thread->pcState().instAddr(), tc->contextId());
+        RequestPtr mem_req = genMemFragmentRequest(frag_addr, size, flags,
+                                                   byteEnable, frag_size,
+                                                   size_left);
 
-        // translate to physical address
-        fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        predicate = (mem_req != nullptr);
+
+        if (predicate) {
+            fault = dtb->translateFunctional(mem_req, tc, BaseTLB::Write);
+        }
 
-        if (!checked_flags && fault == NoFault && unverifiedReq) {
+        if (predicate && !checked_flags && fault == NoFault && unverifiedReq) {
            flags_match = checkFlags(unverifiedReq, mem_req->getVaddr(),
                                     mem_req->getPaddr(), mem_req->getFlags());
            pAddr = mem_req->getPaddr();
@@ -261,7 +302,7 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
         bool was_prefetch = mem_req->isPrefetch();
 
         //If we don't need to access a second cache line, stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
         {
             if (fault != NoFault && was_prefetch) {
               fault = NoFault;
@@ -269,16 +310,13 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
             break;
         }
 
-        //Update size and access address
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        frag_addr += frag_size;
    }
 
    if (!flags_match) {
        warn("%lli: Flags do not match CPU:%#x %#x Checker:%#x %#x %#x\n",
             curTick(), unverifiedReq->getVaddr(), unverifiedReq->getPaddr(),
-            unverifiedReq->getFlags(), addr, pAddr, flags);
+            unverifiedReq->getFlags(), frag_addr, pAddr, flags);
        handleError();
    }
 
@@ -304,12 +342,12 @@ CheckerCPU::writeMem(uint8_t *data, unsigned size,
    // const set of zeros.
    if (flags & Request::STORE_NO_DATA) {
        assert(!data);
-       assert(sizeof(zero_data) <= fullSize);
+       assert(sizeof(zero_data) <= size);
        data = zero_data;
    }
 
    if (unverifiedReq && unverifiedMemData &&
-       memcmp(data, unverifiedMemData, fullSize) && extraData) {
+       memcmp(data, unverifiedMemData, size) && extraData) {
            warn("%lli: Store value does not match value sent to memory! "
                   "data: %#x inst_data: %#x", curTick(), data,
                   unverifiedMemData);
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 8c3000005..66632b720 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -531,11 +531,36 @@ class CheckerCPU : public BaseCPU, public ExecContext
         this->dtb->demapPage(vaddr, asn);
     }
 
+    /**
+     * Helper function used to generate the request for a single fragment of a
+     * memory access.
+     *
+     * Takes care of setting up the appropriate byte-enable mask for the
+     * fragment, given the mask for the entire memory access.
+     *
+     * @param frag_addr Start address of the fragment.
+     * @param size Total size of the memory access in bytes.
+     * @param flags Request flags.
+     * @param byte_enable Byte-enable mask for the entire memory access.
+     * @param[out] frag_size Fragment size.
+     * @param[in,out] size_left Size left to be processed in the memory access.
+     * @return Pointer to the allocated Request, nullptr if the byte-enable
+     * mask is all-false for the fragment.
+     */
+    RequestPtr genMemFragmentRequest(Addr frag_addr, int size,
+                                     Request::Flags flags,
+                                     const std::vector<bool>& byte_enable,
+                                     int& frag_size, int& size_left) const;
+
     Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,
+                  const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
 
     Fault writeMem(uint8_t *data, unsigned size, Addr addr,
-                   Request::Flags flags, uint64_t *res) override;
+                   Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
 
     Fault amoMem(Addr addr, uint8_t* data, unsigned size,
                  Request::Flags flags, AtomicOpFunctor *amo_op) override
-- 
cgit v1.2.3