7 files changed, 164 insertions, 71 deletions
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index caf2427ef..c5b024532 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -49,6 +49,7 @@
 #include "base/output.hh"
 #include "config/the_isa.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/utils.hh"
 #include "debug/Drain.hh"
 #include "debug/ExecFaulting.hh"
 #include "debug/SimpleCPU.hh"
@@ -333,9 +334,43 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
     }
 }
 
+bool
+AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+                                       int size, Request::Flags flags,
+                                       const std::vector<bool>& byte_enable,
+                                       int& frag_size, int& size_left) const
+{
+    bool predicate = true;
+    Addr inst_addr = threadInfo[curThread]->thread->pcState().instAddr();
+
+    frag_size = std::min(
+        cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
+        (Addr) size_left);
+    size_left -= frag_size;
+
+    if (!byte_enable.empty()) {
+        // Set up byte-enable mask for the current fragment
+        auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
+        auto it_end = byte_enable.begin() + (size - size_left);
+        if (isAnyActiveElement(it_start, it_end)) {
+            req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                         inst_addr);
+            req->setByteEnable(std::vector<bool>(it_start, it_end));
+        } else {
+            predicate = false;
+        }
+    } else {
+        req->setVirt(0, frag_addr, frag_size, flags, dataMasterId(),
+                     inst_addr);
+    }
+
+    return predicate;
+}
+
 Fault
 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
-                         Request::Flags flags)
+                         Request::Flags flags,
+                         const std::vector<bool>& byteEnable)
 {
     SimpleExecContext& t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -346,28 +381,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
     if (traceData)
         traceData->setMem(addr, size, flags);
 
-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
     dcache_latency = 0;
 
     req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    bool predicate;
+    Fault fault = NoFault;
+
     while (1) {
-        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+        predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+                                          byteEnable, frag_size, size_left);
 
         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
-                                                          BaseTLB::Read);
+        if (predicate) {
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Read);
+        }
 
         // Now do the access.
-        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+        if (predicate && fault == NoFault &&
+            !req->getFlags().isSet(Request::NO_ACCESS)) {
             Packet pkt(req, Packet::makeReadCmd(req));
             pkt.dataStatic(data);
 
@@ -394,33 +430,29 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
             }
         }
 
-        //If we don't need to access a second cache line, stop now.
-        if (secondAddr <= addr)
-        {
+        // If we don't need to access further cache lines, stop now.
+        if (size_left == 0) {
             if (req->isLockedRMW() && fault == NoFault) {
                 assert(!locked);
                 locked = true;
             }
-
             return fault;
         }
 
         /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
          */
+        frag_addr += frag_size;
 
         //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
     }
 }
 
 Fault
 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
-                          Request::Flags flags, uint64_t *res)
+                          Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
 {
     SimpleExecContext& t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -439,32 +471,37 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
     if (traceData)
         traceData->setMem(addr, size, flags);
 
-    //The size of the data we're trying to read.
-    int fullSize = size;
-
-    //The address of the second part of this access if it needs to be split
-    //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
-
-    if (secondAddr > addr)
-        size = secondAddr - addr;
-
     dcache_latency = 0;
 
     req->taskId(taskId());
+
+    Addr frag_addr = addr;
+    int frag_size = 0;
+    int size_left = size;
+    int curr_frag_id = 0;
+    bool predicate;
+    Fault fault = NoFault;
+
     while (1) {
-        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+        predicate = genMemFragmentRequest(req, frag_addr, size, flags,
+                                          byteEnable, frag_size, size_left);
 
         // translate to physical address
-        Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
+        if (predicate)
+            fault = thread->dtb->translateAtomic(req, thread->getTC(),
+                                                 BaseTLB::Write);
 
         // Now do the access.
-        if (fault == NoFault) {
+        if (predicate && fault == NoFault) {
             bool do_access = true;  // flag to suppress cache access
 
             if (req->isLLSC()) {
-                do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
+                assert(curr_frag_id == 0);
+                do_access =
+                    TheISA::handleLockedWrite(thread, req,
+                                              dcachePort.cacheBlockMask);
             } else if (req->isSwap()) {
+                assert(curr_frag_id == 0);
                 if (req->isCondSwap()) {
                     assert(res);
                     req->setExtraData(*res);
@@ -488,8 +525,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
                 assert(!pkt.isError());
 
                 if (req->isSwap()) {
-                    assert(res);
-                    memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
+                    assert(res && curr_frag_id == 0);
+                    memcpy(res, pkt.getConstPtr<uint8_t>(), size);
                 }
             }
 
@@ -500,14 +537,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
 
         //If there's a fault or we don't need to access a second cache line,
         //stop now.
-        if (fault != NoFault || secondAddr <= addr)
+        if (fault != NoFault || size_left == 0)
         {
             if (req->isLockedRMW() && fault == NoFault) {
-                assert(locked);
+                assert(byteEnable.empty());
+                assert(locked && curr_frag_id == 0);
                 locked = false;
             }
 
-
             if (fault != NoFault && req->isPrefetch()) {
                 return NoFault;
             } else {
@@ -516,15 +553,14 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
         }
 
         /*
-         * Set up for accessing the second cache line.
+         * Set up for accessing the next cache line.
          */
+        frag_addr += frag_size;
 
         //Move the pointer we're reading into to the correct location.
-        data += size;
-        //Adjust the size to get the remaining bytes.
-        size = addr + fullSize - secondAddr;
-        //And access the right address.
-        addr = secondAddr;
+        data += frag_size;
+
+        curr_frag_id++;
     }
 }
 
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 84f379121..100306981 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -195,11 +195,36 @@ class AtomicSimpleCPU : public BaseSimpleCPU
     void activateContext(ThreadID thread_num) override;
     void suspendContext(ThreadID thread_num) override;
 
+    /**
+     * Helper function used to set up the request for a single fragment of a
+     * memory access.
+     *
+     * Takes care of setting up the appropriate byte-enable mask for the
+     * fragment, given the mask for the entire memory access.
+     *
+     * @param req Pointer to the Request object to populate.
+     * @param frag_addr Start address of the fragment.
+     * @param size Total size of the memory access in bytes.
+     * @param flags Request flags.
+     * @param byte_enable Byte-enable mask for the entire memory access.
+     * @param[out] frag_size Fragment size.
+     * @param[in,out] size_left Size left to be processed in the memory access.
+     * @return True if the byte-enable mask for the fragment is not all-false.
+     */
+    bool genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
+                               int size, Request::Flags flags,
+                               const std::vector<bool>& byte_enable,
+                               int& frag_size, int& size_left) const;
+
     Fault readMem(Addr addr, uint8_t *data, unsigned size,
-                  Request::Flags flags) override;
+                  Request::Flags flags,
+                  const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
 
     Fault writeMem(uint8_t *data, unsigned size,
-                   Addr addr, Request::Flags flags, uint64_t *res) override;
+                   Addr addr, Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
 
     Fault amoMem(Addr addr, uint8_t* data, unsigned size,
                  Request::Flags flags, AtomicOpFunctor *amo_op) override;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 298ba9f9e..816add707 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012, 2015, 2017 ARM Limited
+ * Copyright (c) 2010-2012, 2015, 2017, 2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -494,6 +494,10 @@ BaseSimpleCPU::preExecute()
     thread->setFloatReg(ZeroReg, 0);
 #endif // ALPHA_ISA
 
+    // resets predicates
+    t_info.setPredicate(true);
+    t_info.setMemAccPredicate(true);
+
     // check for instruction-count-based events
     comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
     system->instEventQueue.serviceEvents(system->totalNumInsts);
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 8060b07ad..5404e5df8 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012,2015 ARM Limited
+ * Copyright (c) 2011-2012,2015,2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -143,15 +143,21 @@ class BaseSimpleCPU : public BaseCPU
     void startup() override;
 
     virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
-                          Request::Flags flags)
+                          Request::Flags flags,
+                          const std::vector<bool>& byteEnable =
+                              std::vector<bool>())
     { panic("readMem() is not implemented\n"); }
 
     virtual Fault initiateMemRead(Addr addr, unsigned size,
-                                  Request::Flags flags)
+                                  Request::Flags flags,
+                                  const std::vector<bool>& byteEnable =
+                                      std::vector<bool>())
     { panic("initiateMemRead() is not implemented\n"); }
 
     virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
-                           Request::Flags flags, uint64_t* res)
+                           Request::Flags flags, uint64_t* res,
+                           const std::vector<bool>& byteEnable =
+                               std::vector<bool>())
     { panic("writeMem() is not implemented\n"); }
 
     virtual Fault amoMem(Addr addr, uint8_t* data, unsigned size,
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index be7a863c5..de98d6efd 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -434,26 +434,32 @@ class SimpleExecContext : public ExecContext {
         thread->pcState(val);
     }
 
-
     Fault
     readMem(Addr addr, uint8_t *data, unsigned int size,
-            Request::Flags flags) override
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
     {
-        return cpu->readMem(addr, data, size, flags);
+        return cpu->readMem(addr, data, size, flags, byteEnable);
     }
 
     Fault
     initiateMemRead(Addr addr, unsigned int size,
-                    Request::Flags flags) override
+                    Request::Flags flags,
+                    const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
     {
-        return cpu->initiateMemRead(addr, size, flags);
+        return cpu->initiateMemRead(addr, size, flags, byteEnable);
     }
 
     Fault
     writeMem(uint8_t *data, unsigned int size, Addr addr,
-             Request::Flags flags, uint64_t *res) override
+             Request::Flags flags, uint64_t *res,
+             const std::vector<bool>& byteEnable = std::vector<bool>())
+        override
     {
-        return cpu->writeMem(data, size, addr, flags, res);
+        assert(byteEnable.empty() || byteEnable.size() == size);
+        return cpu->writeMem(data, size, addr, flags, res, byteEnable);
     }
 
     Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 637308a96..454259099 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -1,6 +1,6 @@
 /*
  * Copyright 2014 Google, Inc.
- * Copyright (c) 2010-2013,2015,2017 ARM Limited
+ * Copyright (c) 2010-2013,2015,2017-2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -417,7 +417,8 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
 
 Fault
 TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
-                                 Request::Flags flags)
+                                 Request::Flags flags,
+                                 const std::vector<bool>& byteEnable)
 {
     SimpleExecContext &t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -434,6 +435,9 @@ TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
     RequestPtr req = std::make_shared<Request>(
         asid, addr, size, flags, dataMasterId(), pc,
         thread->contextId());
+    if (!byteEnable.empty()) {
+        req->setByteEnable(byteEnable);
+    }
 
     req->taskId(taskId());
 
@@ -491,7 +495,8 @@ TimingSimpleCPU::handleWritePacket()
 
 Fault
 TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
-                          Addr addr, Request::Flags flags, uint64_t *res)
+                          Addr addr, Request::Flags flags, uint64_t *res,
+                          const std::vector<bool>& byteEnable)
 {
     SimpleExecContext &t_info = *threadInfo[curThread];
     SimpleThread* thread = t_info.thread;
@@ -516,6 +521,9 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
     RequestPtr req = std::make_shared<Request>(
         asid, addr, size, flags, dataMasterId(), pc,
         thread->contextId());
+    if (!byteEnable.empty()) {
+        req->setByteEnable(byteEnable);
+    }
 
     req->taskId(taskId());
 
@@ -523,6 +531,10 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
     assert(split_addr <= addr || split_addr - addr < block_size);
 
     _status = DTBWaitResponse;
+
+    // TODO: TimingSimpleCPU doesn't support arbitrarily long multi-line mem.
+    // accesses yet
+
     if (split_addr > addr) {
         RequestPtr req1, req2;
         assert(!req->isLLSC() && !req->isSwap());
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index ce0a4dbfc..a49822fc1 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013,2015 ARM Limited
+ * Copyright (c) 2012-2013,2015,2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -283,10 +283,14 @@ class TimingSimpleCPU : public BaseSimpleCPU
     void suspendContext(ThreadID thread_num) override;
 
     Fault initiateMemRead(Addr addr, unsigned size,
-                          Request::Flags flags) override;
+            Request::Flags flags,
+            const std::vector<bool>& byteEnable =std::vector<bool>())
+        override;
 
     Fault writeMem(uint8_t *data, unsigned size,
-                   Addr addr, Request::Flags flags, uint64_t *res) override;
+                   Addr addr, Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byteEnable = std::vector<bool>())
+        override;
 
     Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
                          AtomicOpFunctor *amo_op) override;