From 884807a68ad7e4f390660b3becfe4ee094334e95 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 15 Jul 2007 20:11:06 -0700 Subject: Fix up a bunch of multilevel coherence issues. Atomic mode seems to work. Timing is closer but not there yet. --HG-- extra : convert_revision : 0dea5c3d4b973d009e9d4a4c21b9cad15961d56f --- configs/example/memtest.py | 2 +- src/cpu/memtest/memtest.cc | 4 +- src/cpu/o3/lsq_impl.hh | 7 ++-- src/mem/bus.cc | 5 ++- src/mem/cache/cache_impl.hh | 94 ++++++++++++++++++++++++++++++++++++++++----- src/mem/packet.hh | 12 ++++-- 6 files changed, 105 insertions(+), 19 deletions(-) diff --git a/configs/example/memtest.py b/configs/example/memtest.py index af100c9a9..5bb874e85 100644 --- a/configs/example/memtest.py +++ b/configs/example/memtest.py @@ -144,7 +144,7 @@ system = System(funcmem = PhysicalMemory(), def make_level(spec, prototypes, attach_obj, attach_port): fanout = spec[0] parent = attach_obj # use attach obj as config parent too - if fanout > 1 or options.force_bus: + if len(spec) > 1 and (fanout > 1 or options.force_bus): new_bus = Bus(clock="500MHz", width=16) new_bus.port = getattr(attach_obj, attach_port) parent.cpu_side_bus = new_bus diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc index db3ca282a..f5c8bb93b 100644 --- a/src/cpu/memtest/memtest.cc +++ b/src/cpu/memtest/memtest.cc @@ -64,7 +64,9 @@ MemTest::CpuPort::recvTiming(PacketPtr pkt) Tick MemTest::CpuPort::recvAtomic(PacketPtr pkt) { - panic("MemTest doesn't expect recvAtomic callback!"); + // must be snoop upcall + assert(pkt->isRequest()); + assert(pkt->getDest() == Packet::Broadcast); return curTick; } diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index b4a6a02da..10c0afd38 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -84,9 +84,10 @@ LSQ::DcachePort::recvTiming(PacketPtr pkt) lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); } else { - //else it is a coherence request, maybe you need to do something - warn("Recieved a coherence request (Invalidate?), 03CPU doesn't" - "update LSQ for these\n"); + // must be a snoop + + // @TODO someday may need to process invalidations in LSQ here + // to provide stronger consistency model } return true; } diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 24a0c6f02..e70558bd6 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -183,8 +183,9 @@ Bus::recvTiming(PacketPtr pkt) // If the bus is busy, or other devices are in line ahead of the current // one, put this device on the retry list. - if (tickNextIdle > curTick || - (retryList.size() && (!inRetry || pktPort != retryList.front()))) + if (!pkt->isExpressSnoop() && + (tickNextIdle > curTick || + (retryList.size() && (!inRetry || pktPort != retryList.front())))) { addToRetryList(pktPort); DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n"); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b159df84a..59571dd6f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -165,11 +165,25 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) blk->trackLoadLocked(pkt); } pkt->setDataFromBlock(blk->data, blkSize); + if (pkt->getSize() == blkSize) { + // special handling for coherent block requests from + // upper-level caches + if (pkt->needsExclusive()) { + // on ReadExReq we give up our copy + tags->invalidateBlk(blk); + } else { + // on ReadReq we create shareable copies here and in + // the requester + pkt->assertShared(); + blk->status &= ~BlkWritable; + } + } } else { // Not a read or write... must be an upgrade. it's OK // to just ack those as long as we have an exclusive // copy at this level. assert(pkt->cmd == MemCmd::UpgradeReq); + tags->invalidateBlk(blk); } } @@ -269,6 +283,18 @@ Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; satisfied = true; satisfyCpuSideRequest(pkt, blk); + } else if (pkt->cmd == MemCmd::Writeback) { + // special case: writeback to read-only block (e.g., from + // L1 into L2). since we're really just passing ownership + // from one cache to another, we can update this cache to + // be the owner without making the block writeable + assert(!blk->isWritable() /* && !blk->isDirty() */); + assert(blkSize == pkt->getSize()); + std::memcpy(blk->data, pkt->getPtr(), blkSize); + blk->status |= BlkDirty; + satisfied = true; + // nothing else to do; writeback doesn't expect response + assert(!pkt->needsResponse()); } else { // permission violation... nothing to do here, leave unsatisfied // for statistics purposes this counts like a complete miss @@ -363,9 +389,10 @@ Cache::timingAccess(PacketPtr pkt) bool needsResponse = pkt->needsResponse(); if (satisfied) { - assert(needsResponse); - pkt->makeTimingResponse(); - cpuSidePort->respond(pkt, curTick+lat); + if (needsResponse) { + pkt->makeTimingResponse(); + cpuSidePort->respond(pkt, curTick+lat); + } } else { // miss if (prefetchMiss) @@ -456,10 +483,30 @@ Cache::atomicAccess(PacketPtr pkt) { int lat = hitLatency; + // @TODO: make this a parameter + bool last_level_cache = false; + if (pkt->memInhibitAsserted()) { - DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n", - pkt->getAddr()); assert(!pkt->req->isUncacheable()); + // have to invalidate ourselves and any lower caches even if + // upper cache will be responding + if (pkt->isInvalidate()) { + BlkType *blk = tags->findBlock(pkt->getAddr()); + if (blk && blk->isValid()) { + tags->invalidateBlk(blk); + DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: invalidating\n", + pkt->cmdString(), pkt->getAddr()); + } + if (!last_level_cache) { + DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n", + pkt->cmdString(), pkt->getAddr()); + lat += memSidePort->sendAtomic(pkt); + } + } else { + DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n", + pkt->cmdString(), pkt->getAddr()); + } + return lat; } @@ -791,9 +838,7 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, assert(pkt->isRead() || blk->isValid()); } - if (pkt->needsExclusive()) { - blk->status = BlkValid | BlkWritable | BlkDirty; - } else if (!pkt->sharedAsserted()) { + if (pkt->needsExclusive() || !pkt->sharedAsserted()) { blk->status = BlkValid | BlkWritable; } else { blk->status = BlkValid; @@ -839,6 +884,37 @@ void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, bool is_timing, bool is_deferred) { + assert(pkt->isRequest()); + + // first propagate snoop upward to see if anyone above us wants to + // handle it. save & restore packet src since it will get + // rewritten to be relative to cpu-side bus (if any) + bool alreadySupplied = pkt->memInhibitAsserted(); + bool upperSupply = false; + if (is_timing) { + Packet *snoopPkt = new Packet(pkt, true); // clear flags + snoopPkt->setExpressSnoop(); + cpuSidePort->sendTiming(snoopPkt); + if (snoopPkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache + assert(!alreadySupplied); + pkt->assertMemInhibit(); + } + if (snoopPkt->sharedAsserted()) { + pkt->assertShared(); + } + delete snoopPkt; + } else { + int origSrc = pkt->getSrc(); + cpuSidePort->sendAtomic(pkt); + if (!alreadySupplied && pkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache: + // forward response to original requester + assert(pkt->isResponse()); + } + pkt->setSrc(origSrc); + } + if (!blk || !blk->isValid()) { return; } @@ -846,7 +922,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // we may end up modifying both the block state and the packet (if // we respond in atomic mode), so just figure out what to do now // and then do it later - bool supply = blk->isDirty() && pkt->isRead(); + bool supply = blk->isDirty() && pkt->isRead() && !upperSupply; bool invalidate = pkt->isInvalidate(); if (pkt->isRead() && !pkt->isInvalidate()) { diff --git a/src/mem/packet.hh b/src/mem/packet.hh index c90842dee..036bd3fd7 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -252,9 +252,11 @@ class Packet : public FastAlloc bool destValid; enum Flag { - // Snoop flags + // Snoop response flags MemInhibit, Shared, + // Special control flags + ExpressSnoop, NUM_PACKET_FLAGS }; @@ -317,6 +319,10 @@ class Packet : public FastAlloc bool memInhibitAsserted() { return flags[MemInhibit]; } bool sharedAsserted() { return flags[Shared]; } + // Special control flags + void setExpressSnoop() { flags[ExpressSnoop] = true; } + bool isExpressSnoop() { return flags[ExpressSnoop]; } + // Network error conditions... encapsulate them as methods since // their encoding keeps changing (from result field to command // field, etc.) @@ -372,7 +378,7 @@ class Packet : public FastAlloc * that, as we can't guarantee that the new packet's lifetime is * less than that of the original packet. In this case the new * packet should allocate its own data. */ - Packet(Packet *origPkt) + Packet(Packet *origPkt, bool clearFlags = false) : cmd(origPkt->cmd), req(origPkt->req), data(origPkt->staticData ? origPkt->data : NULL), staticData(origPkt->staticData), @@ -381,7 +387,7 @@ class Packet : public FastAlloc src(origPkt->src), dest(origPkt->dest), addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid), destValid(origPkt->destValid), - flags(origPkt->flags), + flags(clearFlags ? 0 : origPkt->flags), time(curTick), senderState(origPkt->senderState) { } -- cgit v1.2.3