summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCurtis Dunham <Curtis.Dunham@arm.com>2014-06-27 12:29:00 -0500
committerCurtis Dunham <Curtis.Dunham@arm.com>2014-06-27 12:29:00 -0500
commitf6f63ec0aa68f631691d9eccc18739722a0a9f17 (patch)
tree6d3cd4d0f43381980412ade2ed8266e3d3ba472d
parent3be4f4b846f991c98fe1909631996c5b58d52437 (diff)
downloadgem5-f6f63ec0aa68f631691d9eccc18739722a0a9f17.tar.xz
mem: write streaming support via WriteInvalidate promotion
Support full-block writes directly rather than requiring RMW: * a cache line is allocated in the cache upon receipt of a WriteInvalidateReq, not the WriteInvalidateResp. * only top-level caches allocate the line; the others just pass the request along and invalidate as necessary. * to close a timing window between the *Req and the *Resp, a new metadata bit tracks whether another cache has read a copy of the new line before the writeback to memory.
-rw-r--r--src/mem/cache/base.cc6
-rw-r--r--src/mem/cache/base.hh3
-rw-r--r--src/mem/cache/blk.hh5
-rw-r--r--src/mem/cache/cache.hh5
-rw-r--r--src/mem/cache/cache_impl.hh238
-rw-r--r--src/mem/packet.cc4
-rw-r--r--src/mem/packet.hh8
7 files changed, 190 insertions, 79 deletions
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 2a285bf2f..70d1b4167 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -93,9 +93,9 @@ BaseCache::CacheSlavePort::setBlocked()
// if we already scheduled a retry in this cycle, but it has not yet
// happened, cancel it
if (sendRetryEvent.scheduled()) {
- owner.deschedule(sendRetryEvent);
- DPRINTF(CachePort, "Cache port %s deschedule retry\n", name());
- mustSendRetry = true;
+ owner.deschedule(sendRetryEvent);
+ DPRINTF(CachePort, "Cache port %s deschedule retry\n", name());
+ mustSendRetry = true;
}
}
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 1567aaa62..297b80180 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -94,6 +94,7 @@ class BaseCache : public MemObject
Blocked_NoMSHRs = MSHRQueue_MSHRs,
Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
Blocked_NoTargets,
+ Blocked_PendingWriteInvalidate,
NUM_BLOCKED_CAUSES
};
@@ -168,6 +169,8 @@ class BaseCache : public MemObject
/** Return to normal operation and accept new requests. */
void clearBlocked();
+ bool isBlocked() const { return blocked; }
+
protected:
CacheSlavePort(const std::string &_name, BaseCache *_cache,
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index 626b4818d..ff09b42c4 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -72,7 +72,10 @@ enum CacheBlkStatusBits {
/** block was a hardware prefetch yet unaccessed*/
BlkHWPrefetched = 0x20,
/** block holds data from the secure memory space */
- BlkSecure = 0x40
+ BlkSecure = 0x40,
+ /** can the block transition to E? (hasn't been shared with another cache)
+ * used to close a timing gap when handling WriteInvalidate packets */
+ BlkCanGoExclusive = 0x80
};
/**
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 0ee1e353a..12fb3b0f0 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -181,6 +181,11 @@ class Cache : public BaseCache
const bool doFastWrites;
/**
+ * Turn line-sized writes into WriteInvalidate transactions.
+ */
+ void promoteWholeLineWrites(PacketPtr pkt);
+
+ /**
* Notify the prefetcher on every access, not just misses.
*/
const bool prefetchOnAccess;
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 1a72f285f..a792de19d 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -312,30 +312,20 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
pkt->getAddr(), pkt->isSecure() ? "s" : "ns",
blk ? "hit" : "miss", blk ? blk->print() : "");
- if (blk != NULL) {
-
- if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
- // OK to satisfy access
- incHitCount(pkt);
- satisfyCpuSideRequest(pkt, blk);
- return true;
- }
- }
-
- // Can't satisfy access normally... either no block (blk == NULL)
- // or have block but need exclusive & only have shared.
-
// Writeback handling is special case. We can write the block
// into the cache without having a writeable copy (or any copy at
- // all).
- if (pkt->cmd == MemCmd::Writeback) {
+ // all). Like writebacks, we write into the cache upon initial
+ // receipt of a write-invalidate packets as well.
+ if ((pkt->cmd == MemCmd::Writeback) ||
+ ((pkt->cmd == MemCmd::WriteInvalidateReq) && isTopLevel)) {
assert(blkSize == pkt->getSize());
if (blk == NULL) {
// need to do a replacement
blk = allocateBlock(pkt->getAddr(), pkt->isSecure(), writebacks);
if (blk == NULL) {
// no replaceable block available, give up.
- // writeback will be forwarded to next level.
+ // Writeback will be forwarded to next level,
+ // WriteInvalidate will be retried.
incMissCount(pkt);
return false;
}
@@ -347,17 +337,41 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
}
}
std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
- blk->status |= BlkDirty;
- if (pkt->isSupplyExclusive()) {
- blk->status |= BlkWritable;
+ if (pkt->cmd == MemCmd::Writeback) {
+ blk->status |= BlkDirty;
+ if (pkt->isSupplyExclusive()) {
+ blk->status |= BlkWritable;
+ }
+ // nothing else to do; writeback doesn't expect response
+ assert(!pkt->needsResponse());
+ } else if (pkt->cmd == MemCmd::WriteInvalidateReq) {
+ assert(blk->isReadable()); // implicitly checks for Valid bit also
+ blk->status |= (BlkDirty | BlkCanGoExclusive);
+ blk->status &= ~BlkWritable;
+ ++fastWrites;
}
- // nothing else to do; writeback doesn't expect response
- assert(!pkt->needsResponse());
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
incHitCount(pkt);
return true;
+ } else if ((pkt->cmd == MemCmd::WriteInvalidateReq) && !isTopLevel) {
+ if (blk != NULL) {
+ assert(blk != tempBlock);
+ tags->invalidate(blk);
+ blk->invalidate();
+ }
+ return true;
+ } else if ((blk != NULL) &&
+ (pkt->needsExclusive() ? blk->isWritable()
+ : blk->isReadable())) {
+ // OK to satisfy access
+ incHitCount(pkt);
+ satisfyCpuSideRequest(pkt, blk);
+ return true;
}
+ // Can't satisfy access normally... either no block (blk == NULL)
+ // or have block but need exclusive & only have shared.
+
incMissCount(pkt);
if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
@@ -414,6 +428,19 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
}
template<class TagStore>
+void
+Cache<TagStore>::promoteWholeLineWrites(PacketPtr pkt)
+{
+ // Cache line clearing instructions
+ if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
+ (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0)) {
+ pkt->cmd = MemCmd::WriteInvalidateReq;
+ DPRINTF(Cache, "packet promoted from Write to WriteInvalidate\n");
+ assert(isTopLevel); // should only happen at L1 or I/O cache
+ }
+}
+
+template<class TagStore>
bool
Cache<TagStore>::recvTimingReq(PacketPtr pkt)
{
@@ -439,6 +466,8 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
return true;
}
+ promoteWholeLineWrites(pkt);
+
if (pkt->memInhibitAsserted()) {
DPRINTF(Cache, "mem inhibited on 0x%x (%s): not responding\n",
pkt->getAddr(), pkt->isSecure() ? "s" : "ns");
@@ -496,35 +525,26 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
bool satisfied = access(pkt, blk, lat, writebacks);
-#if 0
- /** @todo make the fast write alloc (wh64) work with coherence. */
-
- // If this is a block size write/hint (WH64) allocate the block here
- // if the coherence protocol allows it.
- if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
- (pkt->cmd == MemCmd::WriteReq
- || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
- // not outstanding misses, can do this
- MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr(),
- pkt->isSecure());
- if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
- if (outstanding_miss) {
- warn("WriteInv doing a fastallocate"
- "with an outstanding miss to the same address\n");
- }
- blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
- writebacks);
- ++fastWrites;
- }
- }
-#endif
-
// track time of availability of next prefetch, if any
Tick next_pf_time = 0;
bool needsResponse = pkt->needsResponse();
+ if (pkt->cmd == MemCmd::WriteInvalidateReq) {
+ if (!satisfied && isTopLevel) {
+ // access() tried to allocate a block but it could not; abort.
+ setBlocked(Blocked_PendingWriteInvalidate);
+ return false;
+ }
+ satisfied = false;
+ // we need to take the miss path (allocate MSHR, etc.) for
+ // WriteInvalidates because they always need to propagate
+ // throughout the memory system
+ }
+
if (satisfied) {
+ // hit (for all other request types)
+
if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
if (blk)
blk->status &= ~BlkHWPrefetched;
@@ -551,6 +571,16 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// @todo: Make someone pay for this
pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
+ if (blk && blk->isValid() && (blk->status & BlkCanGoExclusive) &&
+ pkt->isWrite() && (pkt->cmd != MemCmd::WriteInvalidateReq)) {
+ // Packet is a Write (needs exclusive) should be delayed because
+ // a WriteInvalidate is pending. Instead of going the MSHR route,
+ // the Packet should be replayed, since if the block transitions
+ // to Exclusive the write can complete immediately.
+ setBlocked(Blocked_PendingWriteInvalidate);
+ return false;
+ }
+
Addr blk_addr = blockAlign(pkt->getAddr());
MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
@@ -639,7 +669,10 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
if (pkt->cmd == MemCmd::Writeback) {
allocateWriteBuffer(pkt, time, true);
} else {
- if (blk && blk->isValid()) {
+ if (pkt->cmd == MemCmd::WriteInvalidateReq) {
+ // a WriteInvalidate is not a normal write miss;
+ // the assertions below are not applicable.
+ } else if (blk && blk->isValid()) {
// If we have a write miss to a valid block, we
// need to mark the block non-readable. Otherwise
// if we allow reads while there's an outstanding
@@ -655,7 +688,8 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// internally, and have a sufficiently weak memory
// model, this is probably unnecessary, but at some
// point it must have seemed like we needed it...
- assert(pkt->needsExclusive() && !blk->isWritable());
+ assert(pkt->needsExclusive());
+ assert(!blk->isWritable());
blk->status &= ~BlkReadable;
}
@@ -697,6 +731,12 @@ Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
return NULL;
}
+ // WriteInvalidates for cache line clearing instructions don't
+ // require a read; just send directly to the bus.
+ if (cpu_pkt->cmd == MemCmd::WriteInvalidateReq) {
+ return NULL;
+ }
+
if (!blkValid &&
(cpu_pkt->cmd == MemCmd::Writeback || cpu_pkt->isUpgrade())) {
// Writebacks that weren't allocated in access() and upgrades
@@ -716,7 +756,8 @@ Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
if (blkValid && useUpgrades) {
// only reason to be here is that blk is shared
// (read-only) and we need exclusive
- assert(needsExclusive && !blk->isWritable());
+ assert(needsExclusive);
+ assert(!blk->isWritable());
cmd = cpu_pkt->isLLSC() ? MemCmd::SCUpgradeReq : MemCmd::UpgradeReq;
} else if (cpu_pkt->cmd == MemCmd::SCUpgradeFailReq ||
cpu_pkt->cmd == MemCmd::StoreCondFailReq) {
@@ -751,6 +792,8 @@ Cache<TagStore>::recvAtomic(PacketPtr pkt)
if (system->bypassCaches())
return ticksToCycles(memSidePort->sendAtomic(pkt));
+ promoteWholeLineWrites(pkt);
+
if (pkt->memInhibitAsserted()) {
assert(!pkt->req->isUncacheable());
// have to invalidate ourselves and any lower caches even if
@@ -788,6 +831,10 @@ Cache<TagStore>::recvAtomic(PacketPtr pkt)
if (!access(pkt, blk, lat, writebacks)) {
// MISS
+
+ // WriteInvalidates should never fail an access() in Atomic mode
+ assert(pkt->cmd != MemCmd::WriteInvalidateReq);
+
PacketPtr bus_pkt = getBusPacket(pkt, blk, pkt->needsExclusive());
bool is_forward = (bus_pkt == NULL);
@@ -858,7 +905,25 @@ Cache<TagStore>::recvAtomic(PacketPtr pkt)
delete wbPkt;
}
- // We now have the block one way or another (hit or completed miss)
+ // We now have the block one way or another (hit or completed miss),
+ // except for Request types that perform an invalidate, where the point
+ // is to make sure there is no block.
+
+ if (pkt->cmd == MemCmd::WriteInvalidateReq) {
+ memSidePort->sendAtomic(pkt); // complete writeback
+ if (isTopLevel) {
+ // top level caches allocate and write the data
+ assert(blk->isDirty());
+ assert(!blk->isWritable());
+ assert(blk->status & BlkCanGoExclusive);
+ blk->status &= ~(BlkDirty | BlkCanGoExclusive); // and mark clean
+ blk->status |= BlkWritable; // i.e. O(+cgE) -> E
+ } else {
+ // other caches invalidate.
+ // if the block was found, it was invalidated.
+ assert(!blk || !blk->isValid());
+ }
+ }
if (pkt->needsResponse()) {
pkt->makeAtomicResponse();
@@ -1064,6 +1129,38 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
completion_time = clockEdge(responseLatency) +
pkt->busLastWordDelay;
target->pkt->req->setExtraData(0);
+ } else if (pkt->cmd == MemCmd::WriteInvalidateResp) {
+ if (blk) {
+ assert(blk->isDirty() && !blk->isWritable());
+ // The block, having been written back, is no longer dirty,
+ // nor do we have any reason to see if it was snooped in the
+ // meantime (which CanGoExclusive tracks). If it can go
+ // exclusive, we put it in that state, and otherwise S.
+ // In short: O(+cgE) -> E, O(-cgE) -> S
+ if (blk->status & BlkCanGoExclusive) {
+ blk->status |= BlkWritable;
+ }
+ blk->status &= ~(BlkDirty | BlkCanGoExclusive);
+ }
+ if (isTopLevel) {
+ // makeTimingResponse() will turn it into a WriteResp
+ target->pkt->cmd = MemCmd::WriteReq;
+ // Writes may have been blocked - quite rare case, but
+ // it does happen. Prevent deadlock by telling the core
+ if (isBlocked()) { // to retry.
+ clearBlocked(Blocked_PendingWriteInvalidate);
+ }
+ }
+ // If the block managed to get evicted before its own
+ // writeback (e.g. by a Read/Upgrade (from O(-cgE)/S to
+ // I/E) or ReadExclusive (direct to I/E); either way a
+ // cache-to-cache ownership transfer) completed, that's
+ // OK, we just ignore this response. If the new owner
+ // doesn't actually modify it, a superfluous writeback
+ // will occur for its impatience (since it will think it
+ // has dirty data), but it really can't be helped.
+ completion_time = clockEdge(responseLatency) +
+ pkt->busLastWordDelay;
} else {
// not a cache fill, just forwarding response
// responseLatency is the latency of the return path
@@ -1291,9 +1388,10 @@ Cache<TagStore>::allocateBlock(Addr addr, bool is_secure,
Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
MSHR *repl_mshr = mshrQueue.findMatch(repl_addr, blk->isSecure());
if (repl_mshr) {
- // must be an outstanding upgrade request on block
- // we're about to replace...
- assert(!blk->isWritable());
+ // must be an outstanding upgrade request (common case)
+ // or WriteInvalidate pending writeback (very uncommon case)
+ // on a block we're about to replace...
+ assert(!blk->isWritable() || blk->isDirty());
assert(repl_mshr->needsExclusive());
// too hard to replace block with transient state
// allocation failed, block not inserted
@@ -1378,6 +1476,11 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
blk->status |= BlkDirty;
}
+ if (pkt->cmd == MemCmd::WriteInvalidateReq) {
+ // a block written immediately, all at once, pre-writeback is dirty
+ blk->status |= BlkDirty;
+ }
+
DPRINTF(Cache, "Block addr %x (%s) moving from state %x to %s\n",
addr, is_secure ? "s" : "ns", old_state, blk->print());
@@ -1492,20 +1595,25 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
}
}
- if (!blk || !blk->isValid()) {
- DPRINTF(Cache, "%s snoop miss for %s address %x size %d\n",
- __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
- return;
- } else {
- DPRINTF(Cache, "%s snoop hit for %s for address %x size %d, "
- "old state is %s\n", __func__, pkt->cmdString(),
- pkt->getAddr(), pkt->getSize(), blk->print());
- }
+ if (!blk || !blk->isValid()) {
+ DPRINTF(Cache, "%s snoop miss for %s address %x size %d\n",
+ __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+ return;
+ } else {
+ DPRINTF(Cache, "%s snoop hit for %s for address %x size %d, "
+ "old state is %s\n", __func__, pkt->cmdString(),
+ pkt->getAddr(), pkt->getSize(), blk->print());
+ }
// we may end up modifying both the block state and the packet (if
// we respond in atomic mode), so just figure out what to do now
- // and then do it later
- bool respond = blk->isDirty() && pkt->needsResponse();
+ // and then do it later. If we find dirty data while snooping for a
+ // WriteInvalidate, we don't care, since no merging needs to take place.
+ // We need the eviction to happen as normal, but the data needn't be
+ // sent anywhere, nor should the writeback be inhibited at the memory
+ // controller for any reason.
+ bool respond = blk->isDirty() && pkt->needsResponse()
+ && (pkt->cmd != MemCmd::WriteInvalidateReq);
bool have_exclusive = blk->isWritable();
// Invalidate any prefetch's from below that would strip write permissions
@@ -1522,7 +1630,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
if (pkt->isRead() && !invalidate) {
assert(!needs_exclusive);
pkt->assertShared();
- int bits_to_clear = BlkWritable;
+ int bits_to_clear = BlkWritable | BlkCanGoExclusive;
const bool haveOwnershipState = true; // for now
if (!haveOwnershipState) {
// if we don't support pure ownership (dirty && !writable),
@@ -1950,9 +2058,9 @@ Cache<TagStore>::CpuSidePort::recvTimingReq(PacketPtr pkt)
// either already committed to send a retry, or blocked
success = false;
} else {
- // for now this should always succeed
+ // pass it on to the cache, and let the cache decide if we
+ // have to retry or not
success = cache->recvTimingReq(pkt);
- assert(success);
}
// remember if we have to retry
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 4ff531e80..5b0834285 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -98,11 +98,11 @@ MemCmd::commandInfo[] =
/* HardPFResp */
{ SET4(IsRead, IsResponse, IsHWPrefetch, HasData),
InvalidCmd, "HardPFResp" },
- /* WriteInvalidateReq (currently unused, see packet.hh) */
+ /* WriteInvalidateReq */
{ SET6(IsWrite, NeedsExclusive, IsInvalidate,
IsRequest, HasData, NeedsResponse),
WriteInvalidateResp, "WriteInvalidateReq" },
- /* WriteInvalidateResp (currently unused, see packet.hh) */
+ /* WriteInvalidateResp */
{ SET3(IsWrite, NeedsExclusive, IsResponse),
InvalidCmd, "WriteInvalidateResp" },
/* UpgradeReq */
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index f93725fcb..155a7ff82 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -91,14 +91,6 @@ class MemCmd
HardPFReq,
SoftPFResp,
HardPFResp,
- // WriteInvalidateReq transactions used to be generated by the
- // DMA ports when writing full blocks to memory, however, it
- // is not used anymore since we put the I/O cache in place to
- // deal with partial block writes. Hence, WriteInvalidateReq
- // and WriteInvalidateResp are currently unused. The
- // implication is that the I/O cache does read-exclusive
- // operations on every full-cache-block DMA, and ultimately
- // this needs to be fixed.
WriteInvalidateReq,
WriteInvalidateResp,
UpgradeReq,