summaryrefslogtreecommitdiff
path: root/src/cpu/minor
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/minor')
-rw-r--r--src/cpu/minor/exec_context.hh14
-rw-r--r--src/cpu/minor/execute.cc3
-rw-r--r--src/cpu/minor/fetch2.cc7
-rw-r--r--src/cpu/minor/fetch2.hh1
-rw-r--r--src/cpu/minor/lsq.cc45
-rw-r--r--src/cpu/minor/lsq.hh4
6 files changed, 57 insertions, 17 deletions
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index 179883ecc..02b3dae1c 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -108,7 +108,7 @@ class ExecContext : public ::ExecContext
Request::Flags flags) override
{
execute.getLSQ().pushRequest(inst, true /* load */, nullptr,
- size, addr, flags, NULL);
+ size, addr, flags, NULL, nullptr);
return NoFault;
}
@@ -117,7 +117,17 @@ class ExecContext : public ::ExecContext
Request::Flags flags, uint64_t *res) override
{
execute.getLSQ().pushRequest(inst, false /* store */, data,
- size, addr, flags, res);
+ size, addr, flags, res, nullptr);
+ return NoFault;
+ }
+
+ Fault
+ initiateMemAMO(Addr addr, unsigned int size, Request::Flags flags,
+ AtomicOpFunctor *amo_op) override
+ {
+ // AMO requests are pushed through the store path
+ execute.getLSQ().pushRequest(inst, false /* amo */, nullptr,
+ size, addr, flags, nullptr, amo_op);
return NoFault;
}
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 234a233c2..6a418202f 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -337,6 +337,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
bool is_load = inst->staticInst->isLoad();
bool is_store = inst->staticInst->isStore();
+ bool is_atomic = inst->staticInst->isAtomic();
bool is_prefetch = inst->staticInst->isDataPrefetch();
/* If true, the trace's predicate value will be taken from the exec
@@ -368,7 +369,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst,
*inst);
fatal("Received error response packet for inst: %s\n", *inst);
- } else if (is_store || is_load || is_prefetch) {
+ } else if (is_store || is_load || is_prefetch || is_atomic) {
assert(packet);
DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n",
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index 180890147..9347e4ccb 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -421,6 +421,8 @@ Fetch2::evaluate()
loadInstructions++;
else if (decoded_inst->isStore())
storeInstructions++;
+ else if (decoded_inst->isAtomic())
+ amoInstructions++;
else if (decoded_inst->isVector())
vecInstructions++;
else if (decoded_inst->isFloating())
@@ -636,6 +638,11 @@ Fetch2::regStats()
.name(name() + ".store_instructions")
.desc("Number of memory store instructions successfully decoded")
.flags(total);
+
+ amoInstructions
+ .name(name() + ".amo_instructions")
+ .desc("Number of memory atomic instructions successfully decoded")
+ .flags(total);
}
void
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh
index 2230560f1..114dec0f5 100644
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -171,6 +171,7 @@ class Fetch2 : public Named
Stats::Scalar vecInstructions;
Stats::Scalar loadInstructions;
Stats::Scalar storeInstructions;
+ Stats::Scalar amoInstructions;
public:
/** Dump the whole contents of the input buffer. Useful after a
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index b836ed22d..6fe6c3738 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -676,9 +676,9 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
while (ret == NoAddrRangeCoverage && i != slots.rend()) {
LSQRequestPtr slot = *i;
- /* Cache maintenance instructions go down via the store path *
- * but they carry no data and they shouldn't be considered for
- * forwarding */
+ /* Cache maintenance instructions go down via the store path but
+ * they carry no data and they shouldn't be considered
+ * for forwarding */
if (slot->packet &&
slot->inst->id.threadId == request->inst->id.threadId &&
!slot->packet->req->isCacheMaintenance()) {
@@ -931,8 +931,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
bool is_load = request->isLoad;
bool is_llsc = request->request->isLLSC();
bool is_swap = request->request->isSwap();
+ bool is_atomic = request->request->isAtomic();
bool bufferable = !(request->request->isStrictlyOrdered() ||
- is_llsc || is_swap);
+ is_llsc || is_swap || is_atomic);
if (is_load) {
if (numStoresInTransfers != 0) {
@@ -965,9 +966,16 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
NoAddrRangeCoverage)
{
+ // There's at least another request that targets the same
+ // address and is staying in the storeBuffer. Since our
+ // request is non-bufferable (e.g., strictly ordered or atomic),
+ // we must wait for the other request in the storeBuffer to
+ // complete before we can issue this non-bufferable request.
+ // This is to make sure that the order they access the cache is
+ // correct.
DPRINTF(MinorMem, "Memory access can receive forwarded data"
- " from the store buffer, need to wait for store buffer to"
- " drain\n");
+ " from the store buffer, but need to wait for store buffer"
+ " to drain\n");
return;
}
}
@@ -1469,9 +1477,21 @@ LSQ::needsToTick()
void
LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res)
+ uint64_t *res, AtomicOpFunctor *amo_op)
{
bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
+
+ if (needs_burst && inst->staticInst->isAtomic()) {
+ // AMO requests that access across a cache line boundary are not
+ // allowed since the cache does not guarantee AMO ops to be executed
+ // atomically in two cache lines
+ // For ISAs such as x86 that requires AMO operations to work on
+ // accesses that cross cache-line boundaries, the cache needs to be
+ // modified to support locking both cache lines to guarantee the
+ // atomicity.
+ panic("Do not expect cross-cache-line atomic memory request\n");
+ }
+
LSQRequestPtr request;
/* Copy given data into the request. The request will pass this to the
@@ -1480,15 +1500,16 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
" 0x%x%s lineWidth : 0x%x\n",
- (isLoad ? "load" : "store"), addr, size, flags,
+ (isLoad ? "load" : "store/atomic"), addr, size, flags,
(needs_burst ? " (needs burst)" : ""), lineWidth);
if (!isLoad) {
- /* request_data becomes the property of a ...DataRequest (see below)
+ /* Request_data becomes the property of a ...DataRequest (see below)
* and destroyed by its destructor */
request_data = new uint8_t[size];
- if (flags & Request::STORE_NO_DATA) {
- /* For cache zeroing, just use zeroed data */
+ if (inst->staticInst->isAtomic() ||
+ (flags & Request::STORE_NO_DATA)) {
+ /* For atomic or store-no-data, just use zeroed data */
std::memset(request_data, 0, size);
} else {
std::memcpy(request_data, data, size);
@@ -1511,7 +1532,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
request->request->setVirt(0 /* asid */,
addr, size, flags, cpu.dataMasterId(),
/* I've no idea why we need the PC, but give it */
- inst->pc.instAddr());
+ inst->pc.instAddr(), amo_op);
requests.push(request);
request->startAddrTranslation();
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index da873b4ac..11fa8774f 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -696,11 +696,11 @@ class LSQ : public Named
void completeMemBarrierInst(MinorDynInstPtr inst,
bool committed);
- /** Single interface for readMem/writeMem to issue requests into
+ /** Single interface for readMem/writeMem/amoMem to issue requests into
* the LSQ */
void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
- uint64_t *res);
+ uint64_t *res, AtomicOpFunctor *amo_op);
/** Push a predicate failed-representing request into the queues just
* to maintain commit order */