mem: Clean up packet data allocation

This patch attempts to make the rules for data allocation in the packet explicit, understandable, and easy to verify. The constructor that copies a packet is extended with an additional flag "alloc_data" to enable the call site to explicitly say whether the newly created packet is short-lived (a zero-time snoop), or has an unknown life-time and therefore should allocate its own data (or copy a static pointer in the case of static data). The tricky case is the static data. In essence this is a copy-avoidance scheme where the original source of the request (DMA, CPU etc) does not ask the memory system to return data as part of the packet, but instead provides a pointer, and then the memory system carries this pointer around, and copies the appropriate data to the location itself. Thus any derived packet actually never copies any data. As the original source does not copy any data from the response packet when arriving back at the source, we must maintain the copy of the original pointer to not break the system. We might want to revisit this one day and pay the price for a few extra memcpy invocations. All in all this patch should make it easier to grok what is going on in the memory system and how data is actually copied (or not).
author: Andreas Hansson <andreas.hansson@arm.com> 2014-12-02 06:07:54 -0500
committer: Andreas Hansson <andreas.hansson@arm.com> 2014-12-02 06:07:54 -0500
commit: ea5ccc70417db08379027ca7344e50cba53063dd (patch)
tree: db523845787bb9104a8619ececb814156b0054a8 /src/mem/cache/cache_impl.hh
parent: f012166bb600ebaeefa48e74f7dd7fdfc9742506 (diff)
download: gem5-ea5ccc70417db08379027ca7344e50cba53063dd.tar.xz
1 files changed, 26 insertions, 12 deletions
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 296f31ebd..a161f8085 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -487,17 +487,24 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
         // that have shared copies.  Not necessary if we know that
         // supplier had exclusive copy to begin with.
         if (pkt->needsExclusive() && !pkt->isSupplyExclusive()) {
-            Packet *snoopPkt = new Packet(pkt, true);  // clear flags
+            // create a downstream express snoop with cleared packet
+            // flags, there is no need to allocate any data as the
+            // packet is merely used to co-ordinate state transitions
+            Packet *snoop_pkt = new Packet(pkt, true, false);
+
             // also reset the bus time that the original packet has
             // not yet paid for
-            snoopPkt->firstWordDelay = snoopPkt->lastWordDelay = 0;
-            snoopPkt->setExpressSnoop();
-            snoopPkt->assertMemInhibit();
-            bool M5_VAR_USED success = memSidePort->sendTimingReq(snoopPkt);
-            // the packet is marked inhibited and will thus bypass any
-            // flow control
+            snoop_pkt->firstWordDelay = snoop_pkt->lastWordDelay = 0;
+
+            // make this an instantaneous express snoop, and let the
+            // other caches in the system know that the packet is
+            // inhibited
+            snoop_pkt->setExpressSnoop();
+            snoop_pkt->assertMemInhibit();
+            bool M5_VAR_USED success = memSidePort->sendTimingReq(snoop_pkt);
+            // express snoops always succeed
             assert(success);
-            // main memory will delete snoopPkt
+            // main memory will delete the packet
         }
         // since we're the official target but we aren't responding,
         // delete the packet now.
@@ -1570,12 +1577,19 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
 {
     // sanity check
     assert(req_pkt->isRequest());
+    assert(req_pkt->needsResponse());
 
     DPRINTF(Cache, "%s for %s address %x size %d\n", __func__,
             req_pkt->cmdString(), req_pkt->getAddr(), req_pkt->getSize());
     // timing-mode snoop responses require a new packet, unless we
     // already made a copy...
-    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
+    PacketPtr pkt = req_pkt;
+    if (!already_copied)
+        // do not clear flags, and allocate space for data if the
+        // packet needs it (the only packets that carry data are read
+        // responses)
+        pkt = new Packet(req_pkt, false, req_pkt->isRead());
+
     assert(req_pkt->isInvalidate() || pkt->sharedAsserted());
     pkt->makeTimingResponse();
     // @todo Make someone pay for this
@@ -1624,7 +1638,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
         // rewritten to be relative to cpu-side bus (if any)
         bool alreadyResponded = pkt->memInhibitAsserted();
         if (is_timing) {
-            Packet snoopPkt(pkt, true);  // clear flags
+            Packet snoopPkt(pkt, true, false);  // clear flags, no allocation
             snoopPkt.setExpressSnoop();
             snoopPkt.pushSenderState(new ForwardResponseRecord(pkt->getSrc()));
             // the snoop packet does not need to wait any additional
@@ -1996,7 +2010,7 @@ Cache<TagStore>::getTimingPacket()
             // at the moment. Without this check we could get a stale
             // copy from memory that might get used in place of the
             // dirty one.
-            Packet snoop_pkt(tgt_pkt, true);
+            Packet snoop_pkt(tgt_pkt, true, false);
             snoop_pkt.setExpressSnoop();
             snoop_pkt.senderState = mshr;
             cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
@@ -2032,7 +2046,7 @@ Cache<TagStore>::getTimingPacket()
             // not a cache block request, but a response is expected
             // make copy of current packet to forward, keep current
             // copy for response handling
-            pkt = new Packet(tgt_pkt);
+            pkt = new Packet(tgt_pkt, false, true);
             if (pkt->isWrite()) {
                 pkt->setData(tgt_pkt->getConstPtr<uint8_t>());
             }
author	Andreas Hansson <andreas.hansson@arm.com>	2014-12-02 06:07:54 -0500
committer	Andreas Hansson <andreas.hansson@arm.com>	2014-12-02 06:07:54 -0500
commit	ea5ccc70417db08379027ca7344e50cba53063dd (patch)
tree	db523845787bb9104a8619ececb814156b0054a8 /src/mem/cache/cache_impl.hh
parent	f012166bb600ebaeefa48e74f7dd7fdfc9742506 (diff)
download	gem5-ea5ccc70417db08379027ca7344e50cba53063dd.tar.xz