cpu: support atomic memory request type with AtomicOpFunctor

This patch enables all 4 CPU models (AtomicSimpleCPU, TimingSimpleCPU, MinorCPU and DerivO3CPU) to issue atomic memory (AMO) requests to memory system. Atomic memory instruction is treated as a special store instruction in all CPU models. In simple CPUs, an AMO request with an associated AtomicOpFunctor is simply sent to L1 dcache. In MinorCPU, an AMO request bypasses store buffer and waits for any conflicting store request(s) currently in the store buffer to retire before the AMO request is sent to the cache. AMO requests are not buffered in the store buffer, so their effects appear immediately in the cache. In DerivO3CPU, an AMO request is inserted in the store buffer so that it is delivered to the cache only after all previous stores are issued to the cache. Data forwarding between between an outstanding AMO in the store buffer and a subsequent load is not allowed since the AMO request does not hold valid data until it's executed in the cache. This implementation assumes that a target ISA implementation must insert enough memory fences as micro-ops around an atomic instruction to enforce a correct order of memory instructions with respect to its memory consistency model. Without extra memory fences, this implementation can allow AMOs and other memory instructions that do not conflict (i.e., not target the same address) to reorder. This implementation also assumes that atomic instructions execute within a cache line boundary since the cache for now is not able to execute an operation on two different cache lines in one single step. Therefore, ISAs like x86 that require multi-cache-line atomic instructions need to either use a pair of locking load and unlocking store or change the cache implementation to guarantee the atomicity of an atomic instruction. Change-Id: Ib8a7c81868ac05b98d73afc7d16eb88486f8cf9a Reviewed-on: https://gem5-review.googlesource.com/c/8188 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Jason Lowe-Power <jason@lowepower.com>
author: Tuan Ta <qtt2@cornell.edu> 2018-01-22 13:12:50 -0500
committer: Tuan Ta <qtt2@cornell.edu> 2019-02-08 15:27:04 +0000
commit: 25dc765889d948693995cfa622f001aa94b5364b (patch)
tree: 38a8e93881ad150a482020a1fd706d664ee0c061 /src/cpu/o3/lsq.hh
parent: 165a7dab558c8118622a387683521bea1ebf2e6c (diff)
download: gem5-25dc765889d948693995cfa622f001aa94b5364b.tar.xz
1 files changed, 33 insertions, 12 deletions
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 81b7c04a5..f576dd3f4 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -191,7 +191,7 @@ class LSQ
         enum Flag : FlagsStorage
         {
             IsLoad              = 0x00000001,
-            /** True if this is a store that writes registers (SC). */
+            /** True if this is a store/atomic that writes registers (SC). */
             WbStore             = 0x00000002,
             Delayed             = 0x00000004,
             IsSplit             = 0x00000008,
@@ -211,7 +211,9 @@ class LSQ
             LSQEntryFreed       = 0x00000800,
             /** Store written back. */
             WritebackScheduled  = 0x00001000,
-            WritebackDone       = 0x00002000
+            WritebackDone       = 0x00002000,
+            /** True if this is an atomic request */
+            IsAtomic            = 0x00004000
         };
         FlagsType flags;
 
@@ -250,32 +252,39 @@ class LSQ
         const uint32_t _size;
         const Request::Flags _flags;
         uint32_t _numOutstandingPackets;
+        AtomicOpFunctor *_amo_op;
       protected:
         LSQUnit* lsqUnit() { return &_port; }
         LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
             _state(State::NotIssued), _senderState(nullptr),
             _port(*port), _inst(inst), _data(nullptr),
             _res(nullptr), _addr(0), _size(0), _flags(0),
-            _numOutstandingPackets(0)
+            _numOutstandingPackets(0), _amo_op(nullptr)
         {
             flags.set(Flag::IsLoad, isLoad);
-            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            flags.set(Flag::WbStore,
+                      _inst->isStoreConditional() || _inst->isAtomic());
+            flags.set(Flag::IsAtomic, _inst->isAtomic());
             install();
         }
         LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                    const Addr& addr, const uint32_t& size,
                    const Request::Flags& flags_,
-                   PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+                   PacketDataPtr data = nullptr, uint64_t* res = nullptr,
+                   AtomicOpFunctor* amo_op = nullptr)
             : _state(State::NotIssued), _senderState(nullptr),
             numTranslatedFragments(0),
             numInTranslationFragments(0),
             _port(*port), _inst(inst), _data(data),
             _res(res), _addr(addr), _size(size),
             _flags(flags_),
-            _numOutstandingPackets(0)
+            _numOutstandingPackets(0),
+            _amo_op(amo_op)
         {
             flags.set(Flag::IsLoad, isLoad);
-            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            flags.set(Flag::WbStore,
+                      _inst->isStoreConditional() || _inst->isAtomic());
+            flags.set(Flag::IsAtomic, _inst->isAtomic());
             install();
         }
 
@@ -285,12 +294,20 @@ class LSQ
             return flags.isSet(Flag::IsLoad);
         }
 
+        bool
+        isAtomic() const
+        {
+            return flags.isSet(Flag::IsAtomic);
+        }
+
         /** Install the request in the LQ/SQ. */
         void install()
         {
             if (isLoad()) {
                 _port.loadQueue[_inst->lqIdx].setRequest(this);
             } else {
+                // Store, StoreConditional, and Atomic requests are pushed
+                // to this storeQueue
                 _port.storeQueue[_inst->sqIdx].setRequest(this);
             }
         }
@@ -609,17 +626,21 @@ class LSQ
         using LSQRequest::numInTranslationFragments;
         using LSQRequest::numTranslatedFragments;
         using LSQRequest::_numOutstandingPackets;
+        using LSQRequest::_amo_op;
       public:
         SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
                           const Addr& addr, const uint32_t& size,
                           const Request::Flags& flags_,
                           PacketDataPtr data = nullptr,
-                          uint64_t* res = nullptr) :
-            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+                          uint64_t* res = nullptr,
+                          AtomicOpFunctor* amo_op = nullptr) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+                       amo_op)
         {
             LSQRequest::_requests.push_back(
-                std::make_shared<Request>(inst->getASID(), addr, size, flags_,
-                    inst->masterId(), inst->instAddr(), inst->contextId()));
+                    std::make_shared<Request>(inst->getASID(), addr, size,
+                    flags_, inst->masterId(), inst->instAddr(),
+                    inst->contextId(), amo_op));
             LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
         }
         inline virtual ~SingleDataRequest() {}
@@ -928,7 +949,7 @@ class LSQ
 
     Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
                       unsigned int size, Addr addr, Request::Flags flags,
-                      uint64_t *res);
+                      uint64_t *res, AtomicOpFunctor *amo_op);
 
     /** The CPU pointer. */
     O3CPU *cpu;
author	Tuan Ta <qtt2@cornell.edu>	2018-01-22 13:12:50 -0500
committer	Tuan Ta <qtt2@cornell.edu>	2019-02-08 15:27:04 +0000
commit	25dc765889d948693995cfa622f001aa94b5364b (patch)
tree	38a8e93881ad150a482020a1fd706d664ee0c061 /src/cpu/o3/lsq.hh
parent	165a7dab558c8118622a387683521bea1ebf2e6c (diff)
download	gem5-25dc765889d948693995cfa622f001aa94b5364b.tar.xz