diff options
-rw-r--r-- | src/mem/cache/cache.cc | 11 | ||||
-rw-r--r-- | src/mem/cache/cache.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/mshr.cc | 2 | ||||
-rw-r--r-- | src/mem/coherent_xbar.cc | 115 | ||||
-rw-r--r-- | src/mem/coherent_xbar.hh | 8 | ||||
-rw-r--r-- | src/mem/packet.hh | 19 |
6 files changed, 135 insertions, 22 deletions
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index b0e904d39..a83f8ab12 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -1092,7 +1092,7 @@ Cache::recvAtomic(PacketPtr pkt) // until the point of reference. DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", __func__, pkt->print(), blk->print()); - PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest()); + PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id); writebacks.push_back(wb_pkt); pkt->setSatisfied(); } @@ -1679,7 +1679,7 @@ Cache::writebackBlk(CacheBlk *blk) } PacketPtr -Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest) +Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id) { Request *req = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0, Request::wbMasterId); @@ -1688,7 +1688,7 @@ Cache::writecleanBlk(CacheBlk *blk, Request::Flags dest) } req->taskId(blk->task_id); blk->task_id = ContextSwitchTaskId::Unknown; - PacketPtr pkt = new Packet(req, MemCmd::WriteClean); + PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id); DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(), blk->isWritable(), blk->isDirty()); // make sure the block is not marked dirty @@ -2093,7 +2093,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, if (blk_valid && blk->isDirty()) { DPRINTF(CacheVerbose, "%s: packet (snoop) %s found block: %s\n", __func__, pkt->print(), blk->print()); - PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest()); + PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id); PacketList writebacks; writebacks.push_back(wb_pkt); @@ -2643,7 +2643,8 @@ Cache::sendMSHRQueuePacket(MSHR* mshr) // until the point of reference. DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", __func__, pkt->print(), blk->print()); - PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest()); + PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), + pkt->id); PacketList writebacks; writebacks.push_back(wb_pkt); doWritebacks(writebacks, 0); diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index cd3a1d8e5..4d840be27 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -458,7 +458,7 @@ class Cache : public BaseCache * @param dest The destination of this clean operation * @return The write clean packet for the block. */ - PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest = 0); + PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id); /** * Create a CleanEvict request for the given block. diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index d89adef19..f1a9b985e 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -426,7 +426,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) // the packet and the request as part of handling the deferred // snoop. PacketPtr cp_pkt = will_respond ? new Packet(pkt, true, true) : - new Packet(new Request(*pkt->req), pkt->cmd, blkSize); + new Packet(new Request(*pkt->req), pkt->cmd, blkSize, pkt->id); if (will_respond) { // we are the ordering point, and will consequently diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index e946134d3..02b3122d9 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -40,6 +40,7 @@ * Authors: Ali Saidi * Andreas Hansson * William Wang + * Nikos Nikoleris */ /** @@ -194,6 +195,22 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) if (snoop_caches) { assert(pkt->snoopDelay == 0); + if (pkt->isClean() && !is_destination) { + // before snooping we need to make sure that the memory + // below is not busy and the cache clean request can be + // forwarded to it + if (!masterPorts[master_port_id]->tryTiming(pkt)) { + DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__, + src_port->name(), pkt->print()); + + // update the layer state and schedule an idle event + reqLayers[master_port_id]->failedTiming(src_port, + clockEdge(Cycles(1))); + return false; + } + } + + // the packet is a memory-mapped request and should be // broadcasted to our snoopers but the source if (snoopFilter) { @@ -342,21 +359,76 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) // queue the packet for deletion pendingDelete.reset(pkt); + // normally we respond to the packet we just received if we need to + PacketPtr rsp_pkt = pkt; + PortID rsp_port_id = slave_port_id; + + // If this is the destination of the cache clean operation the + // crossbar is responsible for responding. This crossbar will + // respond when the cache clean is complete. A cache clean + // is complete either: + // * direcly, if no cache above had a dirty copy of the block + // as indicated by the satisfied flag of the packet, or + // * when the crossbar has seen both the cache clean request + // (CleanSharedReq, CleanInvalidReq) and the corresponding + // write (WriteClean) which updates the block in the memory + // below. + if (success && + ((pkt->isClean() && pkt->satisfied()) || + pkt->cmd == MemCmd::WriteClean) && + is_destination) { + PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt; + auto cmo_lookup = outstandingCMO.find(pkt->id); + if (cmo_lookup != outstandingCMO.end()) { + // the cache clean request has already reached this xbar + respond_directly = true; + if (pkt->isWrite()) { + rsp_pkt = cmo_lookup->second; + assert(rsp_pkt); + + // determine the destination + const auto route_lookup = routeTo.find(rsp_pkt->req); + assert(route_lookup != routeTo.end()); + rsp_port_id = route_lookup->second; + assert(rsp_port_id != InvalidPortID); + assert(rsp_port_id < respLayers.size()); + // remove the request from the routing table + routeTo.erase(route_lookup); + } + outstandingCMO.erase(cmo_lookup); + } else { + respond_directly = false; + outstandingCMO.emplace(pkt->id, deferred_rsp); + if (!pkt->isWrite()) { + assert(routeTo.find(pkt->req) == routeTo.end()); + routeTo[pkt->req] = slave_port_id; + + panic_if(routeTo.size() > 512, + "Routing table exceeds 512 packets\n"); + } + } + } + + if (respond_directly) { - assert(pkt->needsResponse()); + assert(rsp_pkt->needsResponse()); assert(success); - pkt->makeResponse(); + rsp_pkt->makeResponse(); if (snoopFilter && !system->bypassCaches()) { // let the snoop filter inspect the response and update its state - snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]); + snoopFilter->updateResponse(rsp_pkt, *slavePorts[rsp_port_id]); } + // we send the response after the current packet, even if the + // response is not for this packet (e.g. cache clean operation + // where both the request and the write packet have to cross + // the destination xbar before the response is sent.) Tick response_time = clockEdge() + pkt->headerDelay; - pkt->headerDelay = 0; + rsp_pkt->headerDelay = 0; - slavePorts[slave_port_id]->schedTimingResp(pkt, response_time); + slavePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time); } return success; @@ -754,6 +826,30 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) response_latency = snoop_response_latency; } + // If this is the destination of the cache clean operation the + // crossbar is responsible for responding. This crossbar will + // respond when the cache clean is complete. An atomic cache clean + // is complete when the crossbars receives the cache clean + // request (CleanSharedReq, CleanInvalidReq), as either: + // * no cache above had a dirty copy of the block as indicated by + // the satisfied flag of the packet, or + // * the crossbar has already seen the corresponding write + // (WriteClean) which updates the block in the memory below. + if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) { + auto it = outstandingCMO.find(pkt->id); + assert(it != outstandingCMO.end()); + // we are responding right away + outstandingCMO.erase(it); + } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) { + // if this is the destination of the operation, the xbar + // sends the responce to the cache clean operation only + // after having encountered the cache clean request + auto M5_VAR_USED ret = outstandingCMO.emplace(pkt->id, nullptr); + // in atomic mode we know that the WriteClean packet should + // precede the clean request + assert(ret.second); + } + // add the response data if (pkt->isResponse()) { pkt_size = pkt->hasData() ? pkt->getSize() : 0; @@ -988,8 +1084,13 @@ bool CoherentXBar::forwardPacket(const PacketPtr pkt) { // we are forwarding the packet if: - // 1) this is a read or a write - // 2) this crossbar is above the point of coherency + // 1) this is a cache clean request to the PoU/PoC and this + // crossbar is above the PoU/PoC + // 2) this is a read or a write + // 3) this crossbar is above the point of coherency + if (pkt->isClean()) { + return !isDestination(pkt); + } return pkt->isRead() || pkt->isWrite() || !pointOfCoherency; } diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh index 0c2907fa0..79777b998 100644 --- a/src/mem/coherent_xbar.hh +++ b/src/mem/coherent_xbar.hh @@ -51,6 +51,7 @@ #ifndef __MEM_COHERENT_XBAR_HH__ #define __MEM_COHERENT_XBAR_HH__ +#include <unordered_map> #include <unordered_set> #include "mem/snoop_filter.hh" @@ -263,6 +264,13 @@ class CoherentXBar : public BaseXBar std::unordered_set<RequestPtr> outstandingSnoop; /** + * Store the outstanding cache maintenance that we are expecting + * snoop responses from so we can determine when we received all + * snoop responses and if any of the agents satisfied the request. + */ + std::unordered_map<PacketId, PacketPtr> outstandingCMO; + + /** * Keep a pointer to the system to be allow to querying memory system * properties. */ diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 88829d358..66625b382 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -70,6 +70,7 @@ class Packet; typedef Packet *PacketPtr; typedef uint8_t* PacketDataPtr; typedef std::list<PacketPtr> PacketList; +typedef uint64_t PacketId; class MemCmd { @@ -316,6 +317,8 @@ class Packet : public Printable /// The command field of the packet. MemCmd cmd; + const PacketId id; + /// A pointer to the original request. const RequestPtr req; @@ -743,9 +746,9 @@ class Packet : public Printable * not be valid. The command must be supplied. */ Packet(const RequestPtr _req, MemCmd _cmd) - : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), - size(0), headerDelay(0), snoopDelay(0), payloadDelay(0), - senderState(NULL) + : cmd(_cmd), id((PacketId)_req), req(_req), data(nullptr), addr(0), + _isSecure(false), size(0), headerDelay(0), snoopDelay(0), + payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr(); @@ -763,10 +766,10 @@ class Packet : public Printable * a request that is for a whole block, not the address from the * req. this allows for overriding the size/addr of the req. */ - Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize) - : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), - headerDelay(0), snoopDelay(0), payloadDelay(0), - senderState(NULL) + Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize, PacketId _id = 0) + : cmd(_cmd), id(_id ? _id : (PacketId)_req), req(_req), data(nullptr), + addr(0), _isSecure(false), headerDelay(0), snoopDelay(0), + payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { addr = req->getPaddr() & ~(_blkSize - 1); @@ -785,7 +788,7 @@ class Packet : public Printable * packet should allocate its own data. */ Packet(const PacketPtr pkt, bool clear_flags, bool alloc_data) - : cmd(pkt->cmd), req(pkt->req), + : cmd(pkt->cmd), id(pkt->id), req(pkt->req), data(nullptr), addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size), bytesValid(pkt->bytesValid), |