diff options
Diffstat (limited to 'src/cpu/minor')
-rw-r--r-- | src/cpu/minor/exec_context.hh | 14 | ||||
-rw-r--r-- | src/cpu/minor/execute.cc | 3 | ||||
-rw-r--r-- | src/cpu/minor/fetch2.cc | 7 | ||||
-rw-r--r-- | src/cpu/minor/fetch2.hh | 1 | ||||
-rw-r--r-- | src/cpu/minor/lsq.cc | 45 | ||||
-rw-r--r-- | src/cpu/minor/lsq.hh | 4 |
6 files changed, 57 insertions, 17 deletions
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index 179883ecc..02b3dae1c 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -108,7 +108,7 @@ class ExecContext : public ::ExecContext Request::Flags flags) override { execute.getLSQ().pushRequest(inst, true /* load */, nullptr, - size, addr, flags, NULL); + size, addr, flags, NULL, nullptr); return NoFault; } @@ -117,7 +117,17 @@ class ExecContext : public ::ExecContext Request::Flags flags, uint64_t *res) override { execute.getLSQ().pushRequest(inst, false /* store */, data, - size, addr, flags, res); + size, addr, flags, res, nullptr); + return NoFault; + } + + Fault + initiateMemAMO(Addr addr, unsigned int size, Request::Flags flags, + AtomicOpFunctor *amo_op) override + { + // AMO requests are pushed through the store path + execute.getLSQ().pushRequest(inst, false /* amo */, nullptr, + size, addr, flags, nullptr, amo_op); return NoFault; } diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 234a233c2..6a418202f 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -337,6 +337,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst, bool is_load = inst->staticInst->isLoad(); bool is_store = inst->staticInst->isStore(); + bool is_atomic = inst->staticInst->isAtomic(); bool is_prefetch = inst->staticInst->isDataPrefetch(); /* If true, the trace's predicate value will be taken from the exec @@ -368,7 +369,7 @@ Execute::handleMemResponse(MinorDynInstPtr inst, *inst); fatal("Received error response packet for inst: %s\n", *inst); - } else if (is_store || is_load || is_prefetch) { + } else if (is_store || is_load || is_prefetch || is_atomic) { assert(packet); DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n", diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc index 180890147..9347e4ccb 100644 --- a/src/cpu/minor/fetch2.cc +++ b/src/cpu/minor/fetch2.cc @@ -421,6 +421,8 @@ Fetch2::evaluate() loadInstructions++; else if (decoded_inst->isStore()) storeInstructions++; + else if (decoded_inst->isAtomic()) + amoInstructions++; else if (decoded_inst->isVector()) vecInstructions++; else if (decoded_inst->isFloating()) @@ -636,6 +638,11 @@ Fetch2::regStats() .name(name() + ".store_instructions") .desc("Number of memory store instructions successfully decoded") .flags(total); + + amoInstructions + .name(name() + ".amo_instructions") + .desc("Number of memory atomic instructions successfully decoded") + .flags(total); } void diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh index 2230560f1..114dec0f5 100644 --- a/src/cpu/minor/fetch2.hh +++ b/src/cpu/minor/fetch2.hh @@ -171,6 +171,7 @@ class Fetch2 : public Named Stats::Scalar vecInstructions; Stats::Scalar loadInstructions; Stats::Scalar storeInstructions; + Stats::Scalar amoInstructions; public: /** Dump the whole contents of the input buffer. Useful after a diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index b836ed22d..6fe6c3738 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -676,9 +676,9 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, while (ret == NoAddrRangeCoverage && i != slots.rend()) { LSQRequestPtr slot = *i; - /* Cache maintenance instructions go down via the store path * - * but they carry no data and they shouldn't be considered for - * forwarding */ + /* Cache maintenance instructions go down via the store path but + * they carry no data and they shouldn't be considered + * for forwarding */ if (slot->packet && slot->inst->id.threadId == request->inst->id.threadId && !slot->packet->req->isCacheMaintenance()) { @@ -931,8 +931,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request) bool is_load = request->isLoad; bool is_llsc = request->request->isLLSC(); bool is_swap = request->request->isSwap(); + bool is_atomic = request->request->isAtomic(); bool bufferable = !(request->request->isStrictlyOrdered() || - is_llsc || is_swap); + is_llsc || is_swap || is_atomic); if (is_load) { if (numStoresInTransfers != 0) { @@ -965,9 +966,16 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request) if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != NoAddrRangeCoverage) { + // There's at least another request that targets the same + // address and is staying in the storeBuffer. Since our + // request is non-bufferable (e.g., strictly ordered or atomic), + // we must wait for the other request in the storeBuffer to + // complete before we can issue this non-bufferable request. + // This is to make sure that the order they access the cache is + // correct. DPRINTF(MinorMem, "Memory access can receive forwarded data" - " from the store buffer, need to wait for store buffer to" - " drain\n"); + " from the store buffer, but need to wait for store buffer" + " to drain\n"); return; } } @@ -1469,9 +1477,21 @@ LSQ::needsToTick() void LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, - uint64_t *res) + uint64_t *res, AtomicOpFunctor *amo_op) { bool needs_burst = transferNeedsBurst(addr, size, lineWidth); + + if (needs_burst && inst->staticInst->isAtomic()) { + // AMO requests that access across a cache line boundary are not + // allowed since the cache does not guarantee AMO ops to be executed + // atomically in two cache lines + // For ISAs such as x86 that requires AMO operations to work on + // accesses that cross cache-line boundaries, the cache needs to be + // modified to support locking both cache lines to guarantee the + // atomicity. + panic("Do not expect cross-cache-line atomic memory request\n"); + } + LSQRequestPtr request; /* Copy given data into the request. The request will pass this to the @@ -1480,15 +1500,16 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" " 0x%x%s lineWidth : 0x%x\n", - (isLoad ? "load" : "store"), addr, size, flags, + (isLoad ? "load" : "store/atomic"), addr, size, flags, (needs_burst ? " (needs burst)" : ""), lineWidth); if (!isLoad) { - /* request_data becomes the property of a ...DataRequest (see below) + /* Request_data becomes the property of a ...DataRequest (see below) * and destroyed by its destructor */ request_data = new uint8_t[size]; - if (flags & Request::STORE_NO_DATA) { - /* For cache zeroing, just use zeroed data */ + if (inst->staticInst->isAtomic() || + (flags & Request::STORE_NO_DATA)) { + /* For atomic or store-no-data, just use zeroed data */ std::memset(request_data, 0, size); } else { std::memcpy(request_data, data, size); @@ -1511,7 +1532,7 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, request->request->setVirt(0 /* asid */, addr, size, flags, cpu.dataMasterId(), /* I've no idea why we need the PC, but give it */ - inst->pc.instAddr()); + inst->pc.instAddr(), amo_op); requests.push(request); request->startAddrTranslation(); diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh index da873b4ac..11fa8774f 100644 --- a/src/cpu/minor/lsq.hh +++ b/src/cpu/minor/lsq.hh @@ -696,11 +696,11 @@ class LSQ : public Named void completeMemBarrierInst(MinorDynInstPtr inst, bool committed); - /** Single interface for readMem/writeMem to issue requests into + /** Single interface for readMem/writeMem/amoMem to issue requests into * the LSQ */ void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, - uint64_t *res); + uint64_t *res, AtomicOpFunctor *amo_op); /** Push a predicate failed-representing request into the queues just * to maintain commit order */ |