summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mem/cache/cache.cc11
-rw-r--r--src/mem/cache/cache.hh2
-rw-r--r--src/mem/cache/mshr.cc2
-rw-r--r--src/mem/coherent_xbar.cc115
-rw-r--r--src/mem/coherent_xbar.hh8
-rw-r--r--src/mem/packet.hh19
6 files changed, 135 insertions, 22 deletions
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index b0e904d39..a83f8ab12 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -1092,7 +1092,7 @@ Cache::recvAtomic(PacketPtr pkt)
// until the point of reference.
DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
__func__, pkt->print(), blk->print());
- PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest());
+ PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
writebacks.push_back(wb_pkt);
pkt->setSatisfied();
}
@@ -1679,7 +1679,7 @@ Cache::writebackBlk(CacheBlk *blk)
}
PacketPtr
-Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest)
+Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
{
Request *req = new Request(tags->regenerateBlkAddr(blk->tag, blk->set),
blkSize, 0, Request::wbMasterId);
@@ -1688,7 +1688,7 @@ Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest)
}
req->taskId(blk->task_id);
blk->task_id = ContextSwitchTaskId::Unknown;
- PacketPtr pkt = new Packet(req, MemCmd::WriteClean);
+ PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id);
DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(),
blk->isWritable(), blk->isDirty());
// make sure the block is not marked dirty
@@ -2093,7 +2093,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
if (blk_valid && blk->isDirty()) {
DPRINTF(CacheVerbose, "%s: packet (snoop) %s found block: %s\n",
__func__, pkt->print(), blk->print());
- PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest());
+ PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
PacketList writebacks;
writebacks.push_back(wb_pkt);
@@ -2643,7 +2643,8 @@ Cache::sendMSHRQueuePacket(MSHR* mshr)
// until the point of reference.
DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
__func__, pkt->print(), blk->print());
- PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest());
+ PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
+ pkt->id);
PacketList writebacks;
writebacks.push_back(wb_pkt);
doWritebacks(writebacks, 0);
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index cd3a1d8e5..4d840be27 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -458,7 +458,7 @@ class Cache : public BaseCache
* @param dest The destination of this clean operation
* @return The write clean packet for the block.
*/
- PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest = 0);
+ PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
/**
* Create a CleanEvict request for the given block.
diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index d89adef19..f1a9b985e 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -426,7 +426,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order)
// the packet and the request as part of handling the deferred
// snoop.
PacketPtr cp_pkt = will_respond ? new Packet(pkt, true, true) :
- new Packet(new Request(*pkt->req), pkt->cmd, blkSize);
+ new Packet(new Request(*pkt->req), pkt->cmd, blkSize, pkt->id);
if (will_respond) {
// we are the ordering point, and will consequently
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index e946134d3..02b3122d9 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -40,6 +40,7 @@
* Authors: Ali Saidi
* Andreas Hansson
* William Wang
+ * Nikos Nikoleris
*/
/**
@@ -194,6 +195,22 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
if (snoop_caches) {
assert(pkt->snoopDelay == 0);
+ if (pkt->isClean() && !is_destination) {
+ // before snooping we need to make sure that the memory
+ // below is not busy and the cache clean request can be
+ // forwarded to it
+ if (!masterPorts[master_port_id]->tryTiming(pkt)) {
+ DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
+ src_port->name(), pkt->print());
+
+ // update the layer state and schedule an idle event
+ reqLayers[master_port_id]->failedTiming(src_port,
+ clockEdge(Cycles(1)));
+ return false;
+ }
+ }
+
+
// the packet is a memory-mapped request and should be
// broadcasted to our snoopers but the source
if (snoopFilter) {
@@ -342,21 +359,76 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
// queue the packet for deletion
pendingDelete.reset(pkt);
+ // normally we respond to the packet we just received if we need to
+ PacketPtr rsp_pkt = pkt;
+ PortID rsp_port_id = slave_port_id;
+
+ // If this is the destination of the cache clean operation the
+ // crossbar is responsible for responding. This crossbar will
+ // respond when the cache clean is complete. A cache clean
+ // is complete either:
+ // * direcly, if no cache above had a dirty copy of the block
+ // as indicated by the satisfied flag of the packet, or
+ // * when the crossbar has seen both the cache clean request
+ // (CleanSharedReq, CleanInvalidReq) and the corresponding
+ // write (WriteClean) which updates the block in the memory
+ // below.
+ if (success &&
+ ((pkt->isClean() && pkt->satisfied()) ||
+ pkt->cmd == MemCmd::WriteClean) &&
+ is_destination) {
+ PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt;
+ auto cmo_lookup = outstandingCMO.find(pkt->id);
+ if (cmo_lookup != outstandingCMO.end()) {
+ // the cache clean request has already reached this xbar
+ respond_directly = true;
+ if (pkt->isWrite()) {
+ rsp_pkt = cmo_lookup->second;
+ assert(rsp_pkt);
+
+ // determine the destination
+ const auto route_lookup = routeTo.find(rsp_pkt->req);
+ assert(route_lookup != routeTo.end());
+ rsp_port_id = route_lookup->second;
+ assert(rsp_port_id != InvalidPortID);
+ assert(rsp_port_id < respLayers.size());
+ // remove the request from the routing table
+ routeTo.erase(route_lookup);
+ }
+ outstandingCMO.erase(cmo_lookup);
+ } else {
+ respond_directly = false;
+ outstandingCMO.emplace(pkt->id, deferred_rsp);
+ if (!pkt->isWrite()) {
+ assert(routeTo.find(pkt->req) == routeTo.end());
+ routeTo[pkt->req] = slave_port_id;
+
+ panic_if(routeTo.size() > 512,
+ "Routing table exceeds 512 packets\n");
+ }
+ }
+ }
+
+
if (respond_directly) {
- assert(pkt->needsResponse());
+ assert(rsp_pkt->needsResponse());
assert(success);
- pkt->makeResponse();
+ rsp_pkt->makeResponse();
if (snoopFilter && !system->bypassCaches()) {
// let the snoop filter inspect the response and update its state
- snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]);
+ snoopFilter->updateResponse(rsp_pkt, *slavePorts[rsp_port_id]);
}
+ // we send the response after the current packet, even if the
+ // response is not for this packet (e.g. cache clean operation
+ // where both the request and the write packet have to cross
+ // the destination xbar before the response is sent.)
Tick response_time = clockEdge() + pkt->headerDelay;
- pkt->headerDelay = 0;
+ rsp_pkt->headerDelay = 0;
- slavePorts[slave_port_id]->schedTimingResp(pkt, response_time);
+ slavePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time);
}
return success;
@@ -754,6 +826,30 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
response_latency = snoop_response_latency;
}
+ // If this is the destination of the cache clean operation the
+ // crossbar is responsible for responding. This crossbar will
+ // respond when the cache clean is complete. An atomic cache clean
+ // is complete when the crossbars receives the cache clean
+ // request (CleanSharedReq, CleanInvalidReq), as either:
+ // * no cache above had a dirty copy of the block as indicated by
+ // the satisfied flag of the packet, or
+ // * the crossbar has already seen the corresponding write
+ // (WriteClean) which updates the block in the memory below.
+ if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) {
+ auto it = outstandingCMO.find(pkt->id);
+ assert(it != outstandingCMO.end());
+ // we are responding right away
+ outstandingCMO.erase(it);
+ } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) {
+ // if this is the destination of the operation, the xbar
+ // sends the responce to the cache clean operation only
+ // after having encountered the cache clean request
+ auto M5_VAR_USED ret = outstandingCMO.emplace(pkt->id, nullptr);
+ // in atomic mode we know that the WriteClean packet should
+ // precede the clean request
+ assert(ret.second);
+ }
+
// add the response data
if (pkt->isResponse()) {
pkt_size = pkt->hasData() ? pkt->getSize() : 0;
@@ -988,8 +1084,13 @@ bool
CoherentXBar::forwardPacket(const PacketPtr pkt)
{
// we are forwarding the packet if:
- // 1) this is a read or a write
- // 2) this crossbar is above the point of coherency
+ // 1) this is a cache clean request to the PoU/PoC and this
+ // crossbar is above the PoU/PoC
+ // 2) this is a read or a write
+ // 3) this crossbar is above the point of coherency
+ if (pkt->isClean()) {
+ return !isDestination(pkt);
+ }
return pkt->isRead() || pkt->isWrite() || !pointOfCoherency;
}
diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh
index 0c2907fa0..79777b998 100644
--- a/src/mem/coherent_xbar.hh
+++ b/src/mem/coherent_xbar.hh
@@ -51,6 +51,7 @@
#ifndef __MEM_COHERENT_XBAR_HH__
#define __MEM_COHERENT_XBAR_HH__
+#include <unordered_map>
#include <unordered_set>
#include "mem/snoop_filter.hh"
@@ -263,6 +264,13 @@ class CoherentXBar : public BaseXBar
std::unordered_set<RequestPtr> outstandingSnoop;
/**
+ * Store the outstanding cache maintenance that we are expecting
+ * snoop responses from so we can determine when we received all
+ * snoop responses and if any of the agents satisfied the request.
+ */
+ std::unordered_map<PacketId, PacketPtr> outstandingCMO;
+
+ /**
* Keep a pointer to the system to be allow to querying memory system
* properties.
*/
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 88829d358..66625b382 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -70,6 +70,7 @@ class Packet;
typedef Packet *PacketPtr;
typedef uint8_t* PacketDataPtr;
typedef std::list<PacketPtr> PacketList;
+typedef uint64_t PacketId;
class MemCmd
{
@@ -316,6 +317,8 @@ class Packet : public Printable
/// The command field of the packet.
MemCmd cmd;
+ const PacketId id;
+
/// A pointer to the original request.
const RequestPtr req;
@@ -743,9 +746,9 @@ class Packet : public Printable
* not be valid. The command must be supplied.
*/
Packet(const RequestPtr _req, MemCmd _cmd)
- : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
- size(0), headerDelay(0), snoopDelay(0), payloadDelay(0),
- senderState(NULL)
+ : cmd(_cmd), id((PacketId)_req), req(_req), data(nullptr), addr(0),
+ _isSecure(false), size(0), headerDelay(0), snoopDelay(0),
+ payloadDelay(0), senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr();
@@ -763,10 +766,10 @@ class Packet : public Printable
* a request that is for a whole block, not the address from the
* req. this allows for overriding the size/addr of the req.
*/
- Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
- : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
- headerDelay(0), snoopDelay(0), payloadDelay(0),
- senderState(NULL)
+ Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize, PacketId _id = 0)
+ : cmd(_cmd), id(_id ? _id : (PacketId)_req), req(_req), data(nullptr),
+ addr(0), _isSecure(false), headerDelay(0), snoopDelay(0),
+ payloadDelay(0), senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr() & ~(_blkSize - 1);
@@ -785,7 +788,7 @@ class Packet : public Printable
* packet should allocate its own data.
*/
Packet(const PacketPtr pkt, bool clear_flags, bool alloc_data)
- : cmd(pkt->cmd), req(pkt->req),
+ : cmd(pkt->cmd), id(pkt->id), req(pkt->req),
data(nullptr),
addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
bytesValid(pkt->bytesValid),