summaryrefslogtreecommitdiff
path: root/src/cpu/o3
diff options
context:
space:
mode:
authorTuan Ta <qtt2@cornell.edu>2018-01-22 13:12:50 -0500
committerTuan Ta <qtt2@cornell.edu>2019-02-08 15:27:04 +0000
commit25dc765889d948693995cfa622f001aa94b5364b (patch)
tree38a8e93881ad150a482020a1fd706d664ee0c061 /src/cpu/o3
parent165a7dab558c8118622a387683521bea1ebf2e6c (diff)
downloadgem5-25dc765889d948693995cfa622f001aa94b5364b.tar.xz
cpu: support atomic memory request type with AtomicOpFunctor
This patch enables all 4 CPU models (AtomicSimpleCPU, TimingSimpleCPU, MinorCPU and DerivO3CPU) to issue atomic memory (AMO) requests to memory system. Atomic memory instruction is treated as a special store instruction in all CPU models. In simple CPUs, an AMO request with an associated AtomicOpFunctor is simply sent to L1 dcache. In MinorCPU, an AMO request bypasses store buffer and waits for any conflicting store request(s) currently in the store buffer to retire before the AMO request is sent to the cache. AMO requests are not buffered in the store buffer, so their effects appear immediately in the cache. In DerivO3CPU, an AMO request is inserted in the store buffer so that it is delivered to the cache only after all previous stores are issued to the cache. Data forwarding between between an outstanding AMO in the store buffer and a subsequent load is not allowed since the AMO request does not hold valid data until it's executed in the cache. This implementation assumes that a target ISA implementation must insert enough memory fences as micro-ops around an atomic instruction to enforce a correct order of memory instructions with respect to its memory consistency model. Without extra memory fences, this implementation can allow AMOs and other memory instructions that do not conflict (i.e., not target the same address) to reorder. This implementation also assumes that atomic instructions execute within a cache line boundary since the cache for now is not able to execute an operation on two different cache lines in one single step. Therefore, ISAs like x86 that require multi-cache-line atomic instructions need to either use a pair of locking load and unlocking store or change the cache implementation to guarantee the atomicity of an atomic instruction. Change-Id: Ib8a7c81868ac05b98d73afc7d16eb88486f8cf9a Reviewed-on: https://gem5-review.googlesource.com/c/8188 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Jason Lowe-Power <jason@lowepower.com>
Diffstat (limited to 'src/cpu/o3')
-rw-r--r--src/cpu/o3/commit.hh2
-rw-r--r--src/cpu/o3/commit_impl.hh18
-rw-r--r--src/cpu/o3/cpu.hh4
-rw-r--r--src/cpu/o3/iew_impl.hh46
-rw-r--r--src/cpu/o3/inst_queue_impl.hh4
-rw-r--r--src/cpu/o3/lsq.hh45
-rw-r--r--src/cpu/o3/lsq_impl.hh17
-rw-r--r--src/cpu/o3/lsq_unit.hh27
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh19
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh17
-rw-r--r--src/cpu/o3/rename_impl.hh12
11 files changed, 154 insertions, 57 deletions
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 4e32f865d..e624557c8 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -498,6 +498,8 @@ class DefaultCommit
Stats::Vector statComRefs;
/** Stat for the total number of committed loads. */
Stats::Vector statComLoads;
+ /** Stat for the total number of committed atomics. */
+ Stats::Vector statComAmos;
/** Total number of committed memory barriers. */
Stats::Vector statComMembars;
/** Total number of committed branches. */
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 2891ce331..ec3d61050 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -208,6 +208,13 @@ DefaultCommit<Impl>::regStats()
.flags(total)
;
+ statComAmos
+ .init(cpu->numThreads)
+ .name(name() + ".amos")
+ .desc("Number of atomic instructions committed")
+ .flags(total)
+ ;
+
statComMembars
.init(cpu->numThreads)
.name(name() + ".membars")
@@ -1158,8 +1165,9 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
// Make sure we are only trying to commit un-executed instructions we
// think are possible.
assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional()
- || head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
- (head_inst->isLoad() && head_inst->strictlyOrdered()));
+ || head_inst->isMemBarrier() || head_inst->isWriteBarrier()
+ || head_inst->isAtomic()
+ || (head_inst->isLoad() && head_inst->strictlyOrdered()));
DPRINTF(Commit, "Encountered a barrier or non-speculative "
"instruction [sn:%lli] at the head of the ROB, PC %s.\n",
@@ -1306,7 +1314,7 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
#endif
// If this was a store, record it for this cycle.
- if (head_inst->isStore())
+ if (head_inst->isStore() || head_inst->isAtomic())
committedStores[tid] = true;
// Return true to indicate that we have committed an instruction.
@@ -1399,6 +1407,10 @@ DefaultCommit<Impl>::updateComInstStats(const DynInstPtr &inst)
if (inst->isLoad()) {
statComLoads[tid]++;
}
+
+ if (inst->isAtomic()) {
+ statComAmos[tid]++;
+ }
}
if (inst->isMemBarrier()) {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ec6be657a..21cae444b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -793,10 +793,10 @@ class FullO3CPU : public BaseO3CPU
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
{
return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
- flags, res);
+ flags, res, amo_op);
}
/** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 251389631..6434ec8c3 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -479,7 +479,8 @@ DefaultIEW<Impl>::squash(ThreadID tid)
if (skidBuffer[tid].front()->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (skidBuffer[tid].front()->isStore()) {
+ if (skidBuffer[tid].front()->isStore() ||
+ skidBuffer[tid].front()->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -862,7 +863,8 @@ DefaultIEW<Impl>::emptyRenameInsts(ThreadID tid)
if (insts[tid].front()->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (insts[tid].front()->isStore()) {
+ if (insts[tid].front()->isStore() ||
+ insts[tid].front()->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -1004,7 +1006,7 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
if (inst->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -1030,7 +1032,8 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
}
// Check LSQ if inst is LD/ST
- if ((inst->isLoad() && ldstQueue.lqFull(tid)) ||
+ if ((inst->isAtomic() && ldstQueue.sqFull(tid)) ||
+ (inst->isLoad() && ldstQueue.lqFull(tid)) ||
(inst->isStore() && ldstQueue.sqFull(tid))) {
DPRINTF(IEW, "[tid:%i]: Issue: %s has become full.\n",tid,
inst->isLoad() ? "LQ" : "SQ");
@@ -1048,7 +1051,25 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
}
// Otherwise issue the instruction just fine.
- if (inst->isLoad()) {
+ if (inst->isAtomic()) {
+ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
+ "encountered, adding to LSQ.\n", tid);
+
+ ldstQueue.insertStore(inst);
+
+ ++iewDispStoreInsts;
+
+ // AMOs need to be set as "canCommit()"
+ // so that commit can process them when they reach the
+ // head of commit.
+ inst->setCanCommit();
+ instQueue.insertNonSpec(inst);
+ add_to_iq = false;
+
+ ++iewDispNonSpecInsts;
+
+ toRename->iewInfo[tid].dispatchedToSQ++;
+ } else if (inst->isLoad()) {
DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
"encountered, adding to LSQ.\n", tid);
@@ -1243,7 +1264,20 @@ DefaultIEW<Impl>::executeInsts()
"reference.\n");
// Tell the LDSTQ to execute this instruction (if it is a load).
- if (inst->isLoad()) {
+ if (inst->isAtomic()) {
+ // AMOs are treated like store requests
+ fault = ldstQueue.executeStore(inst);
+
+ if (inst->isTranslationDelayed() &&
+ fault == NoFault) {
+ // A hw page table walk is currently going on; the
+ // instruction must be deferred.
+ DPRINTF(IEW, "Execute: Delayed translation, deferring "
+ "store.\n");
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+ } else if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index ddd7b6d5f..aa12297d6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1251,13 +1251,15 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
bool is_acq_rel = squashed_inst->isMemBarrier() &&
(squashed_inst->isLoad() ||
- (squashed_inst->isStore() &&
+ squashed_inst->isAtomic() ||
+ (squashed_inst->isStore() &&
!squashed_inst->isStoreConditional()));
// Remove the instruction from the dependency list.
if (is_acq_rel ||
(!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
+ !squashed_inst->isAtomic() &&
!squashed_inst->isMemBarrier() &&
!squashed_inst->isWriteBarrier())) {
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 81b7c04a5..f576dd3f4 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -191,7 +191,7 @@ class LSQ
enum Flag : FlagsStorage
{
IsLoad = 0x00000001,
- /** True if this is a store that writes registers (SC). */
+ /** True if this is a store/atomic that writes registers (SC). */
WbStore = 0x00000002,
Delayed = 0x00000004,
IsSplit = 0x00000008,
@@ -211,7 +211,9 @@ class LSQ
LSQEntryFreed = 0x00000800,
/** Store written back. */
WritebackScheduled = 0x00001000,
- WritebackDone = 0x00002000
+ WritebackDone = 0x00002000,
+ /** True if this is an atomic request */
+ IsAtomic = 0x00004000
};
FlagsType flags;
@@ -250,32 +252,39 @@ class LSQ
const uint32_t _size;
const Request::Flags _flags;
uint32_t _numOutstandingPackets;
+ AtomicOpFunctor *_amo_op;
protected:
LSQUnit* lsqUnit() { return &_port; }
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
- _numOutstandingPackets(0)
+ _numOutstandingPackets(0), _amo_op(nullptr)
{
flags.set(Flag::IsLoad, isLoad);
- flags.set(Flag::WbStore, _inst->isStoreConditional());
+ flags.set(Flag::WbStore,
+ _inst->isStoreConditional() || _inst->isAtomic());
+ flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_,
- PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+ PacketDataPtr data = nullptr, uint64_t* res = nullptr,
+ AtomicOpFunctor* amo_op = nullptr)
: _state(State::NotIssued), _senderState(nullptr),
numTranslatedFragments(0),
numInTranslationFragments(0),
_port(*port), _inst(inst), _data(data),
_res(res), _addr(addr), _size(size),
_flags(flags_),
- _numOutstandingPackets(0)
+ _numOutstandingPackets(0),
+ _amo_op(amo_op)
{
flags.set(Flag::IsLoad, isLoad);
- flags.set(Flag::WbStore, _inst->isStoreConditional());
+ flags.set(Flag::WbStore,
+ _inst->isStoreConditional() || _inst->isAtomic());
+ flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
@@ -285,12 +294,20 @@ class LSQ
return flags.isSet(Flag::IsLoad);
}
+ bool
+ isAtomic() const
+ {
+ return flags.isSet(Flag::IsAtomic);
+ }
+
/** Install the request in the LQ/SQ. */
void install()
{
if (isLoad()) {
_port.loadQueue[_inst->lqIdx].setRequest(this);
} else {
+ // Store, StoreConditional, and Atomic requests are pushed
+ // to this storeQueue
_port.storeQueue[_inst->sqIdx].setRequest(this);
}
}
@@ -609,17 +626,21 @@ class LSQ
using LSQRequest::numInTranslationFragments;
using LSQRequest::numTranslatedFragments;
using LSQRequest::_numOutstandingPackets;
+ using LSQRequest::_amo_op;
public:
SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_,
PacketDataPtr data = nullptr,
- uint64_t* res = nullptr) :
- LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+ uint64_t* res = nullptr,
+ AtomicOpFunctor* amo_op = nullptr) :
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+ amo_op)
{
LSQRequest::_requests.push_back(
- std::make_shared<Request>(inst->getASID(), addr, size, flags_,
- inst->masterId(), inst->instAddr(), inst->contextId()));
+ std::make_shared<Request>(inst->getASID(), addr, size,
+ flags_, inst->masterId(), inst->instAddr(),
+ inst->contextId(), amo_op));
LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
}
inline virtual ~SingleDataRequest() {}
@@ -928,7 +949,7 @@ class LSQ
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res);
+ uint64_t *res, AtomicOpFunctor *amo_op);
/** The CPU pointer. */
O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 8a221a8d5..abe751c88 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -680,13 +680,26 @@ template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op)
{
+ // This comming request can be either load, store or atomic.
+ // Atomic request has a corresponding pointer to its atomic memory
+ // operation
+ bool isAtomic = !isLoad && amo_op;
+
ThreadID tid = cpu->contextToThread(inst->contextId());
auto cacheLineSize = cpu->cacheLineSize();
bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
LSQRequest* req = nullptr;
+ // Atomic requests that access data across cache line boundary are
+ // currently not allowed since the cache does not guarantee corresponding
+ // atomic memory operations to be executed atomically across a cache line.
+ // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+ // the cache needs to be modified to perform atomic update to both cache
+ // lines. For now, such cross-line update is not supported.
+ assert(!isAtomic || (isAtomic && !needs_burst));
+
if (inst->translationStarted()) {
req = inst->savedReq;
assert(req);
@@ -696,7 +709,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
size, flags, data, res);
} else {
req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
- size, flags, data, res);
+ size, flags, data, res, amo_op);
}
assert(req);
inst->setRequest();
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 5b90da4f5..3be67bec4 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -702,10 +702,12 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
bool lower_load_has_store_part = req_s < st_e;
bool upper_load_has_store_part = req_e > st_s;
- // If the store's data has all of the data needed and the load
- // isn't LLSC then
- // we can forward.
- if (store_has_lower_limit && store_has_upper_limit &&
+ // If the store entry is not atomic (atomic does not have valid
+ // data), the store has all of the data needed, and
+ // the load is not LLSC, then
+ // we can forward data from the store to the load
+ if (!store_it->instruction()->isAtomic() &&
+ store_has_lower_limit && store_has_upper_limit &&
!req->mainRequest()->isLLSC()) {
// Get shift amount for offset into the store's data.
@@ -755,17 +757,22 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
return NoFault;
} else if (
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data and the load isn't LLSC
(!req->mainRequest()->isLLSC() &&
((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part))) ||
+ // The load is LLSC, and the store has all or part of the
+ // load's data
(req->mainRequest()->isLLSC() &&
((store_has_lower_limit || upper_load_has_store_part) &&
- (store_has_upper_limit || lower_load_has_store_part)))) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data and the load isn't LLSC or
- // the load is LLSC and the store has all or part of the load's
+ (store_has_upper_limit || lower_load_has_store_part))) ||
+ // The store entry is atomic and has all or part of the load's
// data
+ (store_it->instruction()->isAtomic() &&
+ ((store_has_lower_limit || upper_load_has_store_part) &&
+ (store_has_upper_limit || lower_load_has_store_part)))) {
// If it's already been written back, then don't worry about
// stalling on it.
@@ -857,8 +864,10 @@ LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
storeQueue[store_idx].isAllZeros() = store_no_data;
assert(size <= SQEntry::DataSize || store_no_data);
+ // copy data into the storeQueue only if the store request has valid data
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
- !req->request()->isCacheMaintenance())
+ !req->request()->isCacheMaintenance() &&
+ !req->request()->isAtomic())
memcpy(storeQueue[store_idx].data(), data, size);
// This function only writes the data to the store queue, so no fault
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9756a9ef1..48179ceb8 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -124,16 +124,19 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
assert(!cpu->switchedOut());
if (!inst->isSquashed()) {
if (state->needWB) {
- // Only loads and store conditionals perform the writeback
+ // Only loads, store conditionals and atomics perform the writeback
// after receving the response from the memory
- assert(inst->isLoad() || inst->isStoreConditional());
+ assert(inst->isLoad() || inst->isStoreConditional() ||
+ inst->isAtomic());
writeback(inst, state->request()->mainPacket());
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
auto ss = dynamic_cast<SQSenderState*>(state);
ss->writebackDone();
completeStore(ss->idx);
}
} else if (inst->isStore()) {
+ // This is a regular store (i.e., not store conditionals and
+ // atomics), so it can complete without writing back
completeStore(dynamic_cast<SQSenderState*>(state)->idx);
}
}
@@ -274,7 +277,7 @@ LSQUnit<Impl>::insert(const DynInstPtr &inst)
{
assert(inst->isMemRef());
- assert(inst->isLoad() || inst->isStore());
+ assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
if (inst->isLoad()) {
insertLoad(inst);
@@ -614,8 +617,8 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
assert(store_fault == NoFault);
- if (store_inst->isStoreConditional()) {
- // Store conditionals need to set themselves as able to
+ if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
+ // Store conditionals and Atomics need to set themselves as able to
// writeback if we haven't had a fault by here.
storeQueue[store_idx].canWB() = true;
@@ -751,8 +754,8 @@ LSQUnit<Impl>::writebackStores()
state->inst = inst;
req->senderState(state);
- if (inst->isStoreConditional()) {
- /* Only store conditionals need a writeback. */
+ if (inst->isStoreConditional() || inst->isAtomic()) {
+ /* Only store conditionals and atomics need a writeback. */
state->needWB = true;
}
}
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 26c4b4d6e..f1d0e2313 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -191,11 +191,11 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
// Check any barriers and the dependence predictor for any
// producing memrefs/stores.
InstSeqNum producing_store;
- if (inst->isLoad() && loadBarrier) {
+ if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) {
DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
loadBarrierSN);
producing_store = loadBarrierSN;
- } else if (inst->isStore() && storeBarrier) {
+ } else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) {
DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
storeBarrierSN);
producing_store = storeBarrierSN;
@@ -252,8 +252,8 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
}
}
- if (inst->isStore()) {
- DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+ if (inst->isStore() || inst->isAtomic()) {
+ DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -288,8 +288,8 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
// Might want to turn this part into an inline function or something.
// It's shared between both insert functions.
- if (inst->isStore()) {
- DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+ if (inst->isStore() || inst->isAtomic()) {
+ DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -451,8 +451,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
{
- // Only stores and barriers have dependents.
- if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
+ // Only stores, atomics and barriers have dependents.
+ if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() &&
+ !inst->isWriteBarrier()) {
return;
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index fd9b09e20..c24a09711 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -647,7 +647,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
}
}
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
if (calcFreeSQEntries(tid) <= 0) {
DPRINTF(Rename, "[tid:%u]: Cannot rename due to no free SQ\n");
source = SQ;
@@ -741,12 +741,12 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
renameDestRegs(inst, inst->threadNumber);
- if (inst->isLoad()) {
- loadsInProgress[tid]++;
- }
- if (inst->isStore()) {
- storesInProgress[tid]++;
+ if (inst->isAtomic() || inst->isStore()) {
+ storesInProgress[tid]++;
+ } else if (inst->isLoad()) {
+ loadsInProgress[tid]++;
}
+
++renamed_insts;
// Notify potential listeners that source and destination registers for
// this instruction have been renamed.