6 files changed, 75 insertions, 50 deletions
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 4d9cdbe88..690d85373 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -129,30 +129,24 @@ BaseBus::getSlavePort(const std::string &if_name, PortID idx)
     }
 }
 
-Tick
+void
 BaseBus::calcPacketTiming(PacketPtr pkt)
 {
-    // determine the header time rounded to the closest following
-    // clock edge
-    Tick headerTime = clockEdge(headerCycles);
-
-    // The packet will be sent. Figure out how long it occupies the bus, and
-    // how much of that time is for the first "word", aka bus width.
-    Cycles numCycles(0);
-    if (pkt->hasData()) {
-        // If a packet has data, it needs ceil(size/width) cycles to send it
-        unsigned dataSize = pkt->getSize();
-        numCycles = Cycles(divCeil(dataSize, width));
-    }
+    // the bus will be called at a time that is not necessarily
+    // coinciding with its own clock, so start by determining how long
+    // until the next clock edge (could be zero)
+    Tick offset = nextCycle() - curTick();
 
-    // The first word will be delivered on the cycle after the header.
-    pkt->firstWordTime = headerTime + clockPeriod();
+    // determine how many cycles are needed to send the data
+    unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
 
-    // Note that currently finishTime can be smaller than
-    // firstWordTime if the packet has no data
-    pkt->finishTime = headerTime + numCycles * clockPeriod();
+    // The first word will be delivered on the cycle after the header.
+    pkt->busFirstWordDelay = (headerCycles + 1) * clockPeriod() + offset;
 
-    return headerTime;
+    // Note that currently busLastWordDelay can be smaller than
+    // busFirstWordDelay if the packet has no data
+    pkt->busLastWordDelay = (headerCycles + dataCycles) * clockPeriod() +
+        offset;
 }
 
 template <typename PortClass>
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 015bb51a0..35c206fa9 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -302,12 +302,13 @@ class BaseBus : public MemObject
      */
     AddrRangeList getAddrRanges() const;
 
-    /** Calculate the timing parameters for the packet.  Updates the
-     * firstWordTime and finishTime fields of the packet object.
-     * Returns the tick at which the packet header is completed (which
-     * will be all that is sent if the target rejects the packet).
+    /**
+     * Calculate the timing parameters for the packet. Updates the
+     * busFirstWordDelay and busLastWordDelay fields of the packet
+     * object with the relative number of ticks required to transmit
+     * the header and the first word, and the last word, respectively.
      */
-    Tick calcPacketTiming(PacketPtr pkt);
+    void calcPacketTiming(PacketPtr pkt);
 
     /**
      * Ask everyone on the bus what their size is and determine the
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a7e6a6186..7aa922055 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -898,8 +898,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                 // responseLatency is the latency of the return path
                 // from lower level caches/memory to an upper level cache or
                 // the core.
-                completion_time = responseLatency * clockPeriod() +
-                    (transfer_offset ? pkt->finishTime : pkt->firstWordTime);
+                completion_time = curTick() + responseLatency * clockPeriod() +
+                    (transfer_offset ? pkt->busLastWordDelay :
+                     pkt->busFirstWordDelay);
 
                 assert(!target->pkt->req->isUncacheable());
 
@@ -914,15 +915,15 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
                 // responseLatency is the latency of the return path
                 // from lower level caches/memory to an upper level cache or
                 // the core.
-                completion_time = responseLatency * clockPeriod() +
-                    pkt->finishTime;
+                completion_time = curTick() + responseLatency * clockPeriod() +
+                    pkt->busLastWordDelay;
                 target->pkt->req->setExtraData(0);
             } else {
                 // not a cache fill, just forwarding response
                 // responseLatency is the latency of the return path
                 // from lower level cahces/memory to the core.
-                completion_time = responseLatency * clockPeriod() +
-                    pkt->finishTime;
+                completion_time = curTick() + responseLatency * clockPeriod() +
+                    pkt->busLastWordDelay;
                 if (pkt->isRead() && !is_error) {
                     target->pkt->setData(pkt->getPtr<uint8_t>());
                 }
@@ -984,7 +985,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         }
         MSHRQueue *mq = mshr->queue;
         mq->markPending(mshr);
-        requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+        requestMemSideBus((RequestCause)mq->index, curTick() +
+                          pkt->busLastWordDelay);
     } else {
         mq->deallocate(mshr);
         if (wasFull && !mq->isFull()) {
@@ -1217,7 +1219,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
     }
 
-    blk->whenReady = pkt->finishTime;
+    blk->whenReady = curTick() + pkt->busLastWordDelay;
 
     return blk;
 }
@@ -1575,7 +1577,7 @@ Cache<TagStore>::getTimingPacket()
         pkt = new Packet(tgt_pkt);
         pkt->cmd = MemCmd::UpgradeFailResp;
         pkt->senderState = mshr;
-        pkt->firstWordTime = pkt->finishTime = curTick();
+        pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
         handleResponse(pkt);
         return NULL;
     } else if (mshr->isForwardNoResponse()) {
diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc
index 409f69229..b57484ab3 100644
--- a/src/mem/coherent_bus.cc
+++ b/src/mem/coherent_bus.cc
@@ -135,8 +135,8 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
     // set the source port for routing of the response
     pkt->setSrc(slave_port_id);
 
-    Tick headerFinishTime = is_express_snoop ? 0 : calcPacketTiming(pkt);
-    Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime;
+    calcPacketTiming(pkt);
+    Tick packetFinishTime = pkt->busLastWordDelay + curTick();
 
     // uncacheable requests need never be snooped
     if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@@ -183,7 +183,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
                     src_port->name(), pkt->cmdString(), pkt->getAddr());
 
             // update the bus state and schedule an idle event
-            reqLayer.failedTiming(src_port, headerFinishTime);
+            reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles)));
         } else {
             // update the bus state and schedule an idle event
             reqLayer.succeededTiming(packetFinishTime);
@@ -211,7 +211,7 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id)
             src_port->name(), pkt->cmdString(), pkt->getAddr());
 
     calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->finishTime;
+    Tick packetFinishTime = pkt->busLastWordDelay + curTick();
 
     // the packet is a normal response to a request that we should
     // have seen passing through the bus
@@ -281,7 +281,7 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
     assert(!pkt->isExpressSnoop());
 
     calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->finishTime;
+    Tick packetFinishTime = pkt->busLastWordDelay + curTick();
 
     // determine if the response is from a snoop request we
     // created as the result of a normal request (in which case it
@@ -385,7 +385,8 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
         response_latency = snoop_response_latency;
     }
 
-    pkt->finishTime = curTick() + response_latency;
+    // @todo: Not setting first-word time
+    pkt->busLastWordDelay = response_latency;
     return response_latency;
 }
 
@@ -405,7 +406,8 @@ CoherentBus::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
     if (snoop_response_cmd != MemCmd::InvalidCmd)
         pkt->cmd = snoop_response_cmd;
 
-    pkt->finishTime = curTick() + snoop_response_latency;
+    // @todo: Not setting first-word time
+    pkt->busLastWordDelay = snoop_response_latency;
     return snoop_response_latency;
 }
 
diff --git a/src/mem/noncoherent_bus.cc b/src/mem/noncoherent_bus.cc
index ae5344425..4f6751512 100644
--- a/src/mem/noncoherent_bus.cc
+++ b/src/mem/noncoherent_bus.cc
@@ -110,8 +110,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
     // set the source port for routing of the response
     pkt->setSrc(slave_port_id);
 
-    Tick headerFinishTime = calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->finishTime;
+    calcPacketTiming(pkt);
+    Tick packetFinishTime = pkt->busLastWordDelay + curTick();
 
     // since it is a normal request, determine the destination
     // based on the address and attempt to send the packet
@@ -124,7 +124,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
         DPRINTF(NoncoherentBus, "recvTimingReq: src %s %s 0x%x RETRY\n",
                 src_port->name(), pkt->cmdString(), pkt->getAddr());
 
-        reqLayer.failedTiming(src_port, headerFinishTime);
+        // occupy until the header is sent
+        reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles)));
 
         return false;
     }
@@ -152,7 +153,7 @@ NoncoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id)
             src_port->name(), pkt->cmdString(), pkt->getAddr());
 
     calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->finishTime;
+    Tick packetFinishTime = pkt->busLastWordDelay + curTick();
 
     // send the packet to the destination through one of our slave
     // ports, as determined by the destination field
@@ -189,7 +190,8 @@ NoncoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
     // forward the request to the appropriate destination
     Tick response_latency = masterPorts[dest_id]->sendAtomic(pkt);
 
-    pkt->finishTime = curTick() + response_latency;
+    // @todo: Not setting first-word time
+    pkt->busLastWordDelay = response_latency;
     return response_latency;
 }
 
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 181320850..92fb2a31c 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -330,11 +330,23 @@ class Packet : public Printable
 
   public:
 
-    /// The time at which the packet will be fully transmitted
-    Tick finishTime;
+    /**
+     * The extra delay from seeing the packet until the first word is
+     * transmitted by the bus that provided it (if any). This delay is
+     * used to communicate the bus waiting time to the neighbouring
+     * object (e.g. a cache) that actually makes the packet wait. As
+     * the delay is relative, a 32-bit unsigned should be sufficient.
+     */
+    uint32_t busFirstWordDelay;
 
-    /// The time at which the first chunk of the packet will be transmitted
-    Tick firstWordTime;
+    /**
+     * The extra delay from seeing the packet until the last word is
+     * transmitted by the bus that provided it (if any). Similar to
+     * the first word time, this is used to make up for the fact that
+     * the bus does not make the packet wait. As the delay is relative,
+     * a 32-bit unsigned should be sufficient.
+     */
+    uint32_t busLastWordDelay;
 
     /**
      * A virtual base opaque structure used to hold state associated
@@ -583,6 +595,7 @@ class Packet : public Printable
         :  cmd(_cmd), req(_req), data(NULL),
            src(InvalidPortID), dest(InvalidPortID),
            bytesValidStart(0), bytesValidEnd(0),
+           busFirstWordDelay(0), busLastWordDelay(0),
            senderState(NULL)
     {
         if (req->hasPaddr()) {
@@ -604,6 +617,7 @@ class Packet : public Printable
         :  cmd(_cmd), req(_req), data(NULL),
            src(InvalidPortID), dest(InvalidPortID),
            bytesValidStart(0), bytesValidEnd(0),
+           busFirstWordDelay(0), busLastWordDelay(0),
            senderState(NULL)
     {
         if (req->hasPaddr()) {
@@ -625,7 +639,10 @@ class Packet : public Printable
         :  cmd(pkt->cmd), req(pkt->req),
            data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
            addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
-           bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
+           bytesValidStart(pkt->bytesValidStart),
+           bytesValidEnd(pkt->bytesValidEnd),
+           busFirstWordDelay(pkt->busFirstWordDelay),
+           busLastWordDelay(pkt->busLastWordDelay),
            senderState(pkt->senderState)
     {
         if (!clearFlags)
@@ -664,6 +681,13 @@ class Packet : public Printable
         addr = req->getPaddr();
         size = req->getSize();
 
+        src = InvalidPortID;
+        dest = InvalidPortID;
+        bytesValidStart = 0;
+        bytesValidEnd = 0;
+        busFirstWordDelay = 0;
+        busLastWordDelay = 0;
+
         flags.set(VALID_ADDR|VALID_SIZE);
         deleteData();
     }