summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cpu/base.cc2
-rw-r--r--src/cpu/base_dyn_inst.hh19
-rw-r--r--src/cpu/checker/cpu.hh7
-rw-r--r--src/cpu/exec_context.hh22
-rw-r--r--src/cpu/minor/exec_context.hh14
-rw-r--r--src/cpu/minor/execute.cc3
-rw-r--r--src/cpu/minor/fetch2.cc7
-rw-r--r--src/cpu/minor/fetch2.hh1
-rw-r--r--src/cpu/minor/lsq.cc45
-rw-r--r--src/cpu/minor/lsq.hh4
-rw-r--r--src/cpu/o3/commit.hh2
-rw-r--r--src/cpu/o3/commit_impl.hh18
-rw-r--r--src/cpu/o3/cpu.hh4
-rw-r--r--src/cpu/o3/iew_impl.hh46
-rw-r--r--src/cpu/o3/inst_queue_impl.hh4
-rw-r--r--src/cpu/o3/lsq.hh45
-rw-r--r--src/cpu/o3/lsq_impl.hh17
-rw-r--r--src/cpu/o3/lsq_unit.hh27
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh19
-rw-r--r--src/cpu/o3/mem_dep_unit_impl.hh17
-rw-r--r--src/cpu/o3/rename_impl.hh12
-rw-r--r--src/cpu/simple/atomic.cc75
-rw-r--r--src/cpu/simple/atomic.hh7
-rw-r--r--src/cpu/simple/base.cc2
-rw-r--r--src/cpu/simple/base.hh19
-rw-r--r--src/cpu/simple/exec_context.hh13
-rw-r--r--src/cpu/simple/timing.cc57
-rw-r--r--src/cpu/simple/timing.hh6
28 files changed, 413 insertions, 101 deletions
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 878e65551..30f6baf20 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -409,7 +409,7 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst)
if (inst->isLoad())
ppRetiredLoads->notify(1);
- if (inst->isStore())
+ if (inst->isStore() || inst->isAtomic())
ppRetiredStores->notify(1);
if (inst->isControl())
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index c24517937..9a1ab062c 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -303,6 +303,9 @@ class BaseDynInst : public ExecContext, public RefCounted
Fault writeMem(uint8_t *data, unsigned size, Addr addr,
Request::Flags flags, uint64_t *res);
+ Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
+ AtomicOpFunctor *amo_op);
+
/** True if the DTB address translation has started. */
bool translationStarted() const { return instFlags[TranslationStarted]; }
void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
@@ -920,4 +923,20 @@ BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
/* st */ false, data, size, addr, flags, res);
}
+template<class Impl>
+Fault
+BaseDynInst<Impl>::initiateMemAMO(Addr addr, unsigned size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+{
+ // atomic memory instructions do not have data to be written to memory yet
+ // since the atomic operations will be executed directly in cache/memory.
+ // Therefore, its `data` field is nullptr.
+ // Atomic memory requests need to carry their `amo_op` fields to cache/
+ // memory
+ return cpu->pushRequest(
+ dynamic_cast<typename DynInstPtr::PtrType>(this),
+ /* atomic */ false, nullptr, size, addr, flags, nullptr, amo_op);
+}
+
#endif // __CPU_BASE_DYN_INST_HH__
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index e32c015bf..5f830d7a9 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -536,9 +536,16 @@ class CheckerCPU : public BaseCPU, public ExecContext
Fault readMem(Addr addr, uint8_t *data, unsigned size,
Request::Flags flags) override;
+
Fault writeMem(uint8_t *data, unsigned size, Addr addr,
Request::Flags flags, uint64_t *res) override;
+ Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+ Request::Flags flags, AtomicOpFunctor *amo_op) override
+ {
+ panic("AMO is not supported yet in CPU checker\n");
+ }
+
unsigned int
readStCondFailures() const override {
return thread->readStCondFailures();
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 1c1c8956a..d46cc1315 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -261,6 +261,28 @@ class ExecContext {
Request::Flags flags, uint64_t *res) = 0;
/**
+ * For atomic-mode contexts, perform an atomic AMO (a.k.a., Atomic
+ * Read-Modify-Write Memory Operation)
+ */
+ virtual Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+ {
+ panic("ExecContext::amoMem() should be overridden\n");
+ }
+
+ /**
+ * For timing-mode contexts, initiate an atomic AMO (atomic
+ * read-modify-write memory operation)
+ */
+ virtual Fault initiateMemAMO(Addr addr, unsigned int size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+ {
+ panic("ExecContext::initiateMemAMO() should be overridden\n");
+ }
+
+ /**
* Sets the number of consecutive store conditional failures.
*/
virtual void setStCondFailures(unsigned int sc_failures) = 0;
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 179883ecc..02b3dae1c 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -108,7 +108,7 @@ class ExecContext : public ::ExecContext
Request::Flags flags) override
{
execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
- size, addr, flags, NULL);
+ size, addr, flags, NULL, nullptr);
return NoFault;
}
@@ -117,7 +117,17 @@ class ExecContext : public ::ExecContext
Request::Flags flags, uint64_t *res) override
{
execute.getLSQ().pushRequest(inst, false /* store */, data,
- size, addr, flags, res);
+ size, addr, flags, res, nullptr);
+ return NoFault;
+ }
+
+ Fault
+ initiateMemAMO(Addr addr, unsigned int size, Request::Flags flags,
+ AtomicOpFunctor *amo_op) override
+ {
+ // AMO requests are pushed through the store path
+ execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
+ size, addr, flags, nullptr, amo_op);
return NoFault;
}
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 234a233c2..6a418202f 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -337,6 +337,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
bool is_load = inst->staticInst->isLoad();
bool is_store = inst->staticInst->isStore();
+ bool is_atomic = inst->staticInst->isAtomic();
bool is_prefetch = inst->staticInst->isDataPrefetch();
/* If true, the trace's predicate value will be taken from the exec
@@ -368,7 +369,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
*inst);
fatal("Received error response packet for inst: %s\n", *inst);
- } else if (is_store || is_load || is_prefetch) {
+ } else if (is_store || is_load || is_prefetch || is_atomic) {
assert(packet);
DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n",
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index 180890147..9347e4ccb 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -421,6 +421,8 @@ Fetch2::evaluate()
loadInstructions++;
else if (decoded_inst->isStore())
storeInstructions++;
+ else if (decoded_inst->isAtomic())
+ amoInstructions++;
else if (decoded_inst->isVector())
vecInstructions++;
else if (decoded_inst->isFloating())
@@ -636,6 +638,11 @@ Fetch2::regStats()
.name(name() + ".store_instructions")
.desc("Number of memory store instructions successfully decoded")
.flags(total);
+
+ amoInstructions
+ .name(name() + ".amo_instructions")
+ .desc("Number of memory atomic instructions successfully decoded")
+ .flags(total);
}
void
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh
index 2230560f1..114dec0f5 100644
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -171,6 +171,7 @@ class Fetch2 : public Named
Stats::Scalar vecInstructions;
Stats::Scalar loadInstructions;
Stats::Scalar storeInstructions;
+ Stats::Scalar amoInstructions;
public:
/** Dump the whole contents of the input buffer. Useful after a
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index b836ed22d..6fe6c3738 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -676,9 +676,9 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
while (ret == NoAddrRangeCoverage && i != slots.rend()) {
LSQRequestPtr slot = *i;
- /* Cache maintenance instructions go down via the store path *
- * but they carry no data and they shouldn't be considered for
- * forwarding */
+ /* Cache maintenance instructions go down via the store path but
+ * they carry no data and they shouldn't be considered
+ * for forwarding */
if (slot->packet &&
slot->inst->id.threadId == request->inst->id.threadId &&
!slot->packet->req->isCacheMaintenance()) {
@@ -931,8 +931,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
bool is_load = request->isLoad;
bool is_llsc = request->request->isLLSC();
bool is_swap = request->request->isSwap();
+ bool is_atomic = request->request->isAtomic();
bool bufferable = !(request->request->isStrictlyOrdered() ||
- is_llsc || is_swap);
+ is_llsc || is_swap || is_atomic);
if (is_load) {
if (numStoresInTransfers != 0) {
@@ -965,9 +966,16 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
NoAddrRangeCoverage)
{
+ // There's at least another request that targets the same
+ // address and is staying in the storeBuffer. Since our
+ // request is non-bufferable (e.g., strictly ordered or atomic),
+ // we must wait for the other request in the storeBuffer to
+ // complete before we can issue this non-bufferable request.
+ // This is to make sure that the order they access the cache is
+ // correct.
DPRINTF(MinorMem, "Memory access can receive forwarded data"
- " from the store buffer, need to wait for store buffer to"
- " drain\n");
+ " from the store buffer, but need to wait for store buffer"
+ " to drain\n");
return;
}
}
@@ -1469,9 +1477,21 @@ LSQ::needsToTick()
void
LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op)
{
bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
+
+ if (needs_burst && inst->staticInst->isAtomic()) {
+ // AMO requests that access across a cache line boundary are not
+ // allowed since the cache does not guarantee AMO ops to be executed
+ // atomically in two cache lines
+ // For ISAs such as x86 that requires AMO operations to work on
+ // accesses that cross cache-line boundaries, the cache needs to be
+ // modified to support locking both cache lines to guarantee the
+ // atomicity.
+ panic("Do not expect cross-cache-line atomic memory request\n");
+ }
+
LSQRequestPtr request;
/* Copy given data into the request. The request will pass this to the
@@ -1480,15 +1500,16 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
" 0x%x%s lineWidth : 0x%x\n",
- (isLoad ? "load" : "store"), addr, size, flags,
+ (isLoad ? "load" : "store/atomic"), addr, size, flags,
(needs_burst ? " (needs burst)" : ""), lineWidth);
if (!isLoad) {
- /* request_data becomes the property of a ...DataRequest (see below)
+ /* Request_data becomes the property of a ...DataRequest (see below)
* and destroyed by its destructor */
request_data = new uint8_t[size];
- if (flags & Request::STORE_NO_DATA) {
- /* For cache zeroing, just use zeroed data */
+ if (inst->staticInst->isAtomic() ||
+ (flags & Request::STORE_NO_DATA)) {
+ /* For atomic or store-no-data, just use zeroed data */
std::memset(request_data, 0, size);
} else {
std::memcpy(request_data, data, size);
@@ -1511,7 +1532,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
request->request->setVirt(0 /* asid */,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
- inst->pc.instAddr());
+ inst->pc.instAddr(), amo_op);
requests.push(request);
request->startAddrTranslation();
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index da873b4ac..11fa8774f 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -696,11 +696,11 @@ class LSQ : public Named
void completeMemBarrierInst(MinorDynInstPtr inst,
bool committed);
- /** Single interface for readMem/writeMem to issue requests into
+ /** Single interface for readMem/writeMem/amoMem to issue requests into
* the LSQ */
void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res);
+ uint64_t *res, AtomicOpFunctor *amo_op);
/** Push a predicate failed-representing request into the queues just
* to maintain commit order */
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 4e32f865d..e624557c8 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -498,6 +498,8 @@ class DefaultCommit
Stats::Vector statComRefs;
/** Stat for the total number of committed loads. */
Stats::Vector statComLoads;
+ /** Stat for the total number of committed atomics. */
+ Stats::Vector statComAmos;
/** Total number of committed memory barriers. */
Stats::Vector statComMembars;
/** Total number of committed branches. */
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 2891ce331..ec3d61050 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -208,6 +208,13 @@ DefaultCommit<Impl>::regStats()
.flags(total)
;
+ statComAmos
+ .init(cpu->numThreads)
+ .name(name() + ".amos")
+ .desc("Number of atomic instructions committed")
+ .flags(total)
+ ;
+
statComMembars
.init(cpu->numThreads)
.name(name() + ".membars")
@@ -1158,8 +1165,9 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
// Make sure we are only trying to commit un-executed instructions we
// think are possible.
assert(head_inst->isNonSpeculative() || head_inst->isStoreConditional()
- || head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
- (head_inst->isLoad() && head_inst->strictlyOrdered()));
+ || head_inst->isMemBarrier() || head_inst->isWriteBarrier()
+ || head_inst->isAtomic()
+ || (head_inst->isLoad() && head_inst->strictlyOrdered()));
DPRINTF(Commit, "Encountered a barrier or non-speculative "
"instruction [sn:%lli] at the head of the ROB, PC %s.\n",
@@ -1306,7 +1314,7 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
#endif
// If this was a store, record it for this cycle.
- if (head_inst->isStore())
+ if (head_inst->isStore() || head_inst->isAtomic())
committedStores[tid] = true;
// Return true to indicate that we have committed an instruction.
@@ -1399,6 +1407,10 @@ DefaultCommit<Impl>::updateComInstStats(const DynInstPtr &inst)
if (inst->isLoad()) {
statComLoads[tid]++;
}
+
+ if (inst->isAtomic()) {
+ statComAmos[tid]++;
+ }
}
if (inst->isMemBarrier()) {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index ec6be657a..21cae444b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -793,10 +793,10 @@ class FullO3CPU : public BaseO3CPU
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op = nullptr)
{
return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
- flags, res);
+ flags, res, amo_op);
}
/** CPU read function, forwards read to LSQ. */
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 251389631..6434ec8c3 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -479,7 +479,8 @@ DefaultIEW<Impl>::squash(ThreadID tid)
if (skidBuffer[tid].front()->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (skidBuffer[tid].front()->isStore()) {
+ if (skidBuffer[tid].front()->isStore() ||
+ skidBuffer[tid].front()->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -862,7 +863,8 @@ DefaultIEW<Impl>::emptyRenameInsts(ThreadID tid)
if (insts[tid].front()->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (insts[tid].front()->isStore()) {
+ if (insts[tid].front()->isStore() ||
+ insts[tid].front()->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -1004,7 +1006,7 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
if (inst->isLoad()) {
toRename->iewInfo[tid].dispatchedToLQ++;
}
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
toRename->iewInfo[tid].dispatchedToSQ++;
}
@@ -1030,7 +1032,8 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
}
// Check LSQ if inst is LD/ST
- if ((inst->isLoad() && ldstQueue.lqFull(tid)) ||
+ if ((inst->isAtomic() && ldstQueue.sqFull(tid)) ||
+ (inst->isLoad() && ldstQueue.lqFull(tid)) ||
(inst->isStore() && ldstQueue.sqFull(tid))) {
DPRINTF(IEW, "[tid:%i]: Issue: %s has become full.\n",tid,
inst->isLoad() ? "LQ" : "SQ");
@@ -1048,7 +1051,25 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
}
// Otherwise issue the instruction just fine.
- if (inst->isLoad()) {
+ if (inst->isAtomic()) {
+ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
+ "encountered, adding to LSQ.\n", tid);
+
+ ldstQueue.insertStore(inst);
+
+ ++iewDispStoreInsts;
+
+ // AMOs need to be set as "canCommit()"
+ // so that commit can process them when they reach the
+ // head of commit.
+ inst->setCanCommit();
+ instQueue.insertNonSpec(inst);
+ add_to_iq = false;
+
+ ++iewDispNonSpecInsts;
+
+ toRename->iewInfo[tid].dispatchedToSQ++;
+ } else if (inst->isLoad()) {
DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
"encountered, adding to LSQ.\n", tid);
@@ -1243,7 +1264,20 @@ DefaultIEW<Impl>::executeInsts()
"reference.\n");
// Tell the LDSTQ to execute this instruction (if it is a load).
- if (inst->isLoad()) {
+ if (inst->isAtomic()) {
+ // AMOs are treated like store requests
+ fault = ldstQueue.executeStore(inst);
+
+ if (inst->isTranslationDelayed() &&
+ fault == NoFault) {
+ // A hw page table walk is currently going on; the
+ // instruction must be deferred.
+ DPRINTF(IEW, "Execute: Delayed translation, deferring "
+ "store.\n");
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+ } else if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index ddd7b6d5f..aa12297d6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1251,13 +1251,15 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
bool is_acq_rel = squashed_inst->isMemBarrier() &&
(squashed_inst->isLoad() ||
- (squashed_inst->isStore() &&
+ squashed_inst->isAtomic() ||
+ (squashed_inst->isStore() &&
!squashed_inst->isStoreConditional()));
// Remove the instruction from the dependency list.
if (is_acq_rel ||
(!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
+ !squashed_inst->isAtomic() &&
!squashed_inst->isMemBarrier() &&
!squashed_inst->isWriteBarrier())) {
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 81b7c04a5..f576dd3f4 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -191,7 +191,7 @@ class LSQ
enum Flag : FlagsStorage
{
IsLoad = 0x00000001,
- /** True if this is a store that writes registers (SC). */
+ /** True if this is a store/atomic that writes registers (SC). */
WbStore = 0x00000002,
Delayed = 0x00000004,
IsSplit = 0x00000008,
@@ -211,7 +211,9 @@ class LSQ
LSQEntryFreed = 0x00000800,
/** Store written back. */
WritebackScheduled = 0x00001000,
- WritebackDone = 0x00002000
+ WritebackDone = 0x00002000,
+ /** True if this is an atomic request */
+ IsAtomic = 0x00004000
};
FlagsType flags;
@@ -250,32 +252,39 @@ class LSQ
const uint32_t _size;
const Request::Flags _flags;
uint32_t _numOutstandingPackets;
+ AtomicOpFunctor *_amo_op;
protected:
LSQUnit* lsqUnit() { return &_port; }
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
- _numOutstandingPackets(0)
+ _numOutstandingPackets(0), _amo_op(nullptr)
{
flags.set(Flag::IsLoad, isLoad);
- flags.set(Flag::WbStore, _inst->isStoreConditional());
+ flags.set(Flag::WbStore,
+ _inst->isStoreConditional() || _inst->isAtomic());
+ flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_,
- PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+ PacketDataPtr data = nullptr, uint64_t* res = nullptr,
+ AtomicOpFunctor* amo_op = nullptr)
: _state(State::NotIssued), _senderState(nullptr),
numTranslatedFragments(0),
numInTranslationFragments(0),
_port(*port), _inst(inst), _data(data),
_res(res), _addr(addr), _size(size),
_flags(flags_),
- _numOutstandingPackets(0)
+ _numOutstandingPackets(0),
+ _amo_op(amo_op)
{
flags.set(Flag::IsLoad, isLoad);
- flags.set(Flag::WbStore, _inst->isStoreConditional());
+ flags.set(Flag::WbStore,
+ _inst->isStoreConditional() || _inst->isAtomic());
+ flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
@@ -285,12 +294,20 @@ class LSQ
return flags.isSet(Flag::IsLoad);
}
+ bool
+ isAtomic() const
+ {
+ return flags.isSet(Flag::IsAtomic);
+ }
+
/** Install the request in the LQ/SQ. */
void install()
{
if (isLoad()) {
_port.loadQueue[_inst->lqIdx].setRequest(this);
} else {
+ // Store, StoreConditional, and Atomic requests are pushed
+ // to this storeQueue
_port.storeQueue[_inst->sqIdx].setRequest(this);
}
}
@@ -609,17 +626,21 @@ class LSQ
using LSQRequest::numInTranslationFragments;
using LSQRequest::numTranslatedFragments;
using LSQRequest::_numOutstandingPackets;
+ using LSQRequest::_amo_op;
public:
SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_,
PacketDataPtr data = nullptr,
- uint64_t* res = nullptr) :
- LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+ uint64_t* res = nullptr,
+ AtomicOpFunctor* amo_op = nullptr) :
+ LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
+ amo_op)
{
LSQRequest::_requests.push_back(
- std::make_shared<Request>(inst->getASID(), addr, size, flags_,
- inst->masterId(), inst->instAddr(), inst->contextId()));
+ std::make_shared<Request>(inst->getASID(), addr, size,
+ flags_, inst->masterId(), inst->instAddr(),
+ inst->contextId(), amo_op));
LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
}
inline virtual ~SingleDataRequest() {}
@@ -928,7 +949,7 @@ class LSQ
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res);
+ uint64_t *res, AtomicOpFunctor *amo_op);
/** The CPU pointer. */
O3CPU *cpu;
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 8a221a8d5..abe751c88 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -680,13 +680,26 @@ template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op)
{
+ // This comming request can be either load, store or atomic.
+ // Atomic request has a corresponding pointer to its atomic memory
+ // operation
+ bool isAtomic = !isLoad && amo_op;
+
ThreadID tid = cpu->contextToThread(inst->contextId());
auto cacheLineSize = cpu->cacheLineSize();
bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
LSQRequest* req = nullptr;
+ // Atomic requests that access data across cache line boundary are
+ // currently not allowed since the cache does not guarantee corresponding
+ // atomic memory operations to be executed atomically across a cache line.
+ // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+ // the cache needs to be modified to perform atomic update to both cache
+ // lines. For now, such cross-line update is not supported.
+ assert(!isAtomic || (isAtomic && !needs_burst));
+
if (inst->translationStarted()) {
req = inst->savedReq;
assert(req);
@@ -696,7 +709,7 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
size, flags, data, res);
} else {
req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
- size, flags, data, res);
+ size, flags, data, res, amo_op);
}
assert(req);
inst->setRequest();
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index 5b90da4f5..3be67bec4 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -702,10 +702,12 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
bool lower_load_has_store_part = req_s < st_e;
bool upper_load_has_store_part = req_e > st_s;
- // If the store's data has all of the data needed and the load
- // isn't LLSC then
- // we can forward.
- if (store_has_lower_limit && store_has_upper_limit &&
+ // If the store entry is not atomic (atomic does not have valid
+ // data), the store has all of the data needed, and
+ // the load is not LLSC, then
+ // we can forward data from the store to the load
+ if (!store_it->instruction()->isAtomic() &&
+ store_has_lower_limit && store_has_upper_limit &&
!req->mainRequest()->isLLSC()) {
// Get shift amount for offset into the store's data.
@@ -755,17 +757,22 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
return NoFault;
} else if (
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data and the load isn't LLSC
(!req->mainRequest()->isLLSC() &&
((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part))) ||
+ // The load is LLSC, and the store has all or part of the
+ // load's data
(req->mainRequest()->isLLSC() &&
((store_has_lower_limit || upper_load_has_store_part) &&
- (store_has_upper_limit || lower_load_has_store_part)))) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data and the load isn't LLSC or
- // the load is LLSC and the store has all or part of the load's
+ (store_has_upper_limit || lower_load_has_store_part))) ||
+ // The store entry is atomic and has all or part of the load's
// data
+ (store_it->instruction()->isAtomic() &&
+ ((store_has_lower_limit || upper_load_has_store_part) &&
+ (store_has_upper_limit || lower_load_has_store_part)))) {
// If it's already been written back, then don't worry about
// stalling on it.
@@ -857,8 +864,10 @@ LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
storeQueue[store_idx].isAllZeros() = store_no_data;
assert(size <= SQEntry::DataSize || store_no_data);
+ // copy data into the storeQueue only if the store request has valid data
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
- !req->request()->isCacheMaintenance())
+ !req->request()->isCacheMaintenance() &&
+ !req->request()->isAtomic())
memcpy(storeQueue[store_idx].data(), data, size);
// This function only writes the data to the store queue, so no fault
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 9756a9ef1..48179ceb8 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -124,16 +124,19 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
assert(!cpu->switchedOut());
if (!inst->isSquashed()) {
if (state->needWB) {
- // Only loads and store conditionals perform the writeback
+ // Only loads, store conditionals and atomics perform the writeback
// after receving the response from the memory
- assert(inst->isLoad() || inst->isStoreConditional());
+ assert(inst->isLoad() || inst->isStoreConditional() ||
+ inst->isAtomic());
writeback(inst, state->request()->mainPacket());
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
auto ss = dynamic_cast<SQSenderState*>(state);
ss->writebackDone();
completeStore(ss->idx);
}
} else if (inst->isStore()) {
+ // This is a regular store (i.e., not store conditionals and
+ // atomics), so it can complete without writing back
completeStore(dynamic_cast<SQSenderState*>(state)->idx);
}
}
@@ -274,7 +277,7 @@ LSQUnit<Impl>::insert(const DynInstPtr &inst)
{
assert(inst->isMemRef());
- assert(inst->isLoad() || inst->isStore());
+ assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
if (inst->isLoad()) {
insertLoad(inst);
@@ -614,8 +617,8 @@ LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
assert(store_fault == NoFault);
- if (store_inst->isStoreConditional()) {
- // Store conditionals need to set themselves as able to
+ if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
+ // Store conditionals and Atomics need to set themselves as able to
// writeback if we haven't had a fault by here.
storeQueue[store_idx].canWB() = true;
@@ -751,8 +754,8 @@ LSQUnit<Impl>::writebackStores()
state->inst = inst;
req->senderState(state);
- if (inst->isStoreConditional()) {
- /* Only store conditionals need a writeback. */
+ if (inst->isStoreConditional() || inst->isAtomic()) {
+ /* Only store conditionals and atomics need a writeback. */
state->needWB = true;
}
}
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index 26c4b4d6e..f1d0e2313 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -191,11 +191,11 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
// Check any barriers and the dependence predictor for any
// producing memrefs/stores.
InstSeqNum producing_store;
- if (inst->isLoad() && loadBarrier) {
+ if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) {
DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
loadBarrierSN);
producing_store = loadBarrierSN;
- } else if (inst->isStore() && storeBarrier) {
+ } else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) {
DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
storeBarrierSN);
producing_store = storeBarrierSN;
@@ -252,8 +252,8 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
}
}
- if (inst->isStore()) {
- DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+ if (inst->isStore() || inst->isAtomic()) {
+ DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -288,8 +288,8 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
// Might want to turn this part into an inline function or something.
// It's shared between both insert functions.
- if (inst->isStore()) {
- DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
+ if (inst->isStore() || inst->isAtomic()) {
+ DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
@@ -451,8 +451,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
{
- // Only stores and barriers have dependents.
- if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
+ // Only stores, atomics and barriers have dependents.
+ if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() &&
+ !inst->isWriteBarrier()) {
return;
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index fd9b09e20..c24a09711 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -647,7 +647,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
}
}
- if (inst->isStore()) {
+ if (inst->isStore() || inst->isAtomic()) {
if (calcFreeSQEntries(tid) <= 0) {
DPRINTF(Rename, "[tid:%u]: Cannot rename due to no free SQ\n");
source = SQ;
@@ -741,12 +741,12 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
renameDestRegs(inst, inst->threadNumber);
- if (inst->isLoad()) {
- loadsInProgress[tid]++;
- }
- if (inst->isStore()) {
- storesInProgress[tid]++;
+ if (inst->isAtomic() || inst->isStore()) {
+ storesInProgress[tid]++;
+ } else if (inst->isLoad()) {
+ loadsInProgress[tid]++;
}
+
++renamed_insts;
// Notify potential listeners that source and destination registers for
// this instruction have been renamed.
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index e91fafbcc..caf2427ef 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -72,6 +72,7 @@ AtomicSimpleCPU::init()
ifetch_req->setContext(cid);
data_read_req->setContext(cid);
data_write_req->setContext(cid);
+ data_amo_req->setContext(cid);
}
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -90,6 +91,7 @@ AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
ifetch_req = std::make_shared<Request>();
data_read_req = std::make_shared<Request>();
data_write_req = std::make_shared<Request>();
+ data_amo_req = std::make_shared<Request>();
}
@@ -417,14 +419,6 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
}
Fault
-AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags)
-{
- panic("initiateMemRead() is for timing accesses, and should "
- "never be called on AtomicSimpleCPU.\n");
-}
-
-Fault
AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
Request::Flags flags, uint64_t *res)
{
@@ -534,6 +528,70 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
}
}
+Fault
+AtomicSimpleCPU::amoMem(Addr addr, uint8_t* data, unsigned size,
+ Request::Flags flags, AtomicOpFunctor *amo_op)
+{
+ SimpleExecContext& t_info = *threadInfo[curThread];
+ SimpleThread* thread = t_info.thread;
+
+ // use the CPU's statically allocated amo request and packet objects
+ const RequestPtr &req = data_amo_req;
+
+ if (traceData)
+ traceData->setMem(addr, size, flags);
+
+ //The address of the second part of this access if it needs to be split
+ //across a cache line boundary.
+ Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
+
+ // AMO requests that access across a cache line boundary are not
+ // allowed since the cache does not guarantee AMO ops to be executed
+ // atomically in two cache lines
+ // For ISAs such as x86 that requires AMO operations to work on
+ // accesses that cross cache-line boundaries, the cache needs to be
+ // modified to support locking both cache lines to guarantee the
+ // atomicity.
+ if (secondAddr > addr) {
+ panic("AMO request should not access across a cache line boundary\n");
+ }
+
+ dcache_latency = 0;
+
+ req->taskId(taskId());
+ req->setVirt(0, addr, size, flags, dataMasterId(),
+ thread->pcState().instAddr(), amo_op);
+
+ // translate to physical address
+ Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+ BaseTLB::Write);
+
+ // Now do the access.
+ if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+ // We treat AMO accesses as Write accesses with SwapReq command
+ // data will hold the return data of the AMO access
+ Packet pkt(req, Packet::makeWriteCmd(req));
+ pkt.dataStatic(data);
+
+ if (req->isMmappedIpr())
+ dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
+ else {
+ dcache_latency += sendPacket(dcachePort, &pkt);
+ }
+
+ dcache_access = true;
+
+ assert(!pkt.isError());
+ assert(!req->isLLSC());
+ }
+
+ if (fault != NoFault && req->isPrefetch()) {
+ return NoFault;
+ }
+
+ //If there's a fault and we're not doing prefetch, return it
+ return fault;
+}
void
AtomicSimpleCPU::tick()
@@ -550,6 +608,7 @@ AtomicSimpleCPU::tick()
ifetch_req->setContext(cid);
data_read_req->setContext(cid);
data_write_req->setContext(cid);
+ data_amo_req->setContext(cid);
}
SimpleExecContext& t_info = *threadInfo[curThread];
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index a5151aa18..84f379121 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -163,6 +163,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
RequestPtr ifetch_req;
RequestPtr data_read_req;
RequestPtr data_write_req;
+ RequestPtr data_amo_req;
bool dcache_access;
Tick dcache_latency;
@@ -197,12 +198,12 @@ class AtomicSimpleCPU : public BaseSimpleCPU
Fault readMem(Addr addr, uint8_t *data, unsigned size,
Request::Flags flags) override;
- Fault initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags) override;
-
Fault writeMem(uint8_t *data, unsigned size,
Addr addr, Request::Flags flags, uint64_t *res) override;
+ Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+ Request::Flags flags, AtomicOpFunctor *amo_op) override;
+
void regProbePoints() override;
/**
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index f71277d1c..422c73298 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -644,7 +644,7 @@ BaseSimpleCPU::postExecute()
t_info.numLoadInsts++;
}
- if (curStaticInst->isStore()){
+ if (curStaticInst->isStore() || curStaticInst->isAtomic()){
t_info.numStoreInsts++;
}
/* End power model statistics */
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index e62fcf4d1..8060b07ad 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -143,13 +143,26 @@ class BaseSimpleCPU : public BaseCPU
void startup() override;
virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
- Request::Flags flags) = 0;
+ Request::Flags flags)
+ { panic("readMem() is not implemented\n"); }
virtual Fault initiateMemRead(Addr addr, unsigned size,
- Request::Flags flags) = 0;
+ Request::Flags flags)
+ { panic("initiateMemRead() is not implemented\n"); }
virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
- Request::Flags flags, uint64_t* res) = 0;
+ Request::Flags flags, uint64_t* res)
+ { panic("writeMem() is not implemented\n"); }
+
+ virtual Fault amoMem(Addr addr, uint8_t* data, unsigned size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+ { panic("amoMem() is not implemented\n"); }
+
+ virtual Fault initiateMemAMO(Addr addr, unsigned size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+ { panic("initiateMemAMO() is not implemented\n"); }
void countInst();
Counter totalInsts() const override;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 0552dc0c6..de5cc7fd7 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -456,6 +456,19 @@ class SimpleExecContext : public ExecContext {
return cpu->writeMem(data, size, addr, flags, res);
}
+ Fault amoMem(Addr addr, uint8_t *data, unsigned int size,
+ Request::Flags flags, AtomicOpFunctor *amo_op) override
+ {
+ return cpu->amoMem(addr, data, size, flags, amo_op);
+ }
+
+ Fault initiateMemAMO(Addr addr, unsigned int size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op) override
+ {
+ return cpu->initiateMemAMO(addr, size, flags, amo_op);
+ }
+
/**
* Sets the number of consecutive store conditional failures.
*/
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index b5450cf5f..637308a96 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -293,6 +293,7 @@ TimingSimpleCPU::sendData(const RequestPtr &req, uint8_t *data, uint64_t *res,
PacketPtr pkt = buildPacket(req, read);
pkt->dataDynamic<uint8_t>(data);
+
if (req->getFlags().isSet(Request::NO_ACCESS)) {
assert(!dcache_pkt);
pkt->makeResponse();
@@ -415,14 +416,6 @@ TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
}
Fault
-TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
- unsigned size, Request::Flags flags)
-{
- panic("readMem() is for atomic accesses, and should "
- "never be called on TimingSimpleCPU.\n");
-}
-
-Fault
TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size,
Request::Flags flags)
{
@@ -556,6 +549,54 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
return NoFault;
}
+Fault
+TimingSimpleCPU::initiateMemAMO(Addr addr, unsigned size,
+ Request::Flags flags,
+ AtomicOpFunctor *amo_op)
+{
+ SimpleExecContext &t_info = *threadInfo[curThread];
+ SimpleThread* thread = t_info.thread;
+
+ Fault fault;
+ const int asid = 0;
+ const Addr pc = thread->instAddr();
+ unsigned block_size = cacheLineSize();
+ BaseTLB::Mode mode = BaseTLB::Write;
+
+ if (traceData)
+ traceData->setMem(addr, size, flags);
+
+ RequestPtr req = make_shared<Request>(asid, addr, size, flags,
+ dataMasterId(), pc, thread->contextId(), amo_op);
+
+ assert(req->hasAtomicOpFunctor());
+
+ req->taskId(taskId());
+
+ Addr split_addr = roundDown(addr + size - 1, block_size);
+
+ // AMO requests that access across a cache line boundary are not
+ // allowed since the cache does not guarantee AMO ops to be executed
+ // atomically in two cache lines
+ // For ISAs such as x86 that requires AMO operations to work on
+ // accesses that cross cache-line boundaries, the cache needs to be
+ // modified to support locking both cache lines to guarantee the
+ // atomicity.
+ if (split_addr > addr) {
+ panic("AMO requests should not access across a cache line boundary\n");
+ }
+
+ _status = DTBWaitResponse;
+
+ WholeTranslationState *state =
+ new WholeTranslationState(req, new uint8_t[size], NULL, mode);
+ DataTranslation<TimingSimpleCPU *> *translation
+ = new DataTranslation<TimingSimpleCPU *>(this, state);
+ thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
+
+ return NoFault;
+}
+
void
TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 0300d38eb..ce0a4dbfc 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -282,15 +282,15 @@ class TimingSimpleCPU : public BaseSimpleCPU
void activateContext(ThreadID thread_num) override;
void suspendContext(ThreadID thread_num) override;
- Fault readMem(Addr addr, uint8_t *data, unsigned size,
- Request::Flags flags) override;
-
Fault initiateMemRead(Addr addr, unsigned size,
Request::Flags flags) override;
Fault writeMem(uint8_t *data, unsigned size,
Addr addr, Request::Flags flags, uint64_t *res) override;
+ Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
+ AtomicOpFunctor *amo_op) override;
+
void fetch();
void sendFetch(const Fault &fault,
const RequestPtr &req, ThreadContext *tc);