From 6b70afd0d4ec8821105e506d7a20f9af01b8eafb Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Fri, 6 Nov 2015 03:26:36 -0500 Subject: mem: Use the packet delays and do not just zero them out This patch updates the I/O devices, bridge and simple memory to take the packet header and payload delay into account in their latency calculations. In all cases we add the header delay, i.e. the accumulated pipeline delay of any crossbars, and the payload delay needed for deserialisation of any payload. Due to the additional unknown latency contribution, the packet queue of the simple memory is changed to use insertion sorting based on the time stamp. Moreover, since the memory hands out exclusive (non shared) responses, we also need to ensure ordering for reads to the same address. --- src/mem/bridge.cc | 17 +++++++++++++---- src/mem/simple_mem.cc | 29 +++++++++++++++++++++++------ src/mem/simple_mem.hh | 4 ++-- 3 files changed, 38 insertions(+), 12 deletions(-) (limited to 'src/mem') diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 855f39de3..1f7d1d43a 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -136,10 +136,14 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt) DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size()); - // @todo: We need to pay for this and not just zero it out + // technically the packet only reaches us after the header delay, + // and typically we also need to deserialise any payload (unless + // the two sides of the bridge are synchronous) + Tick receive_delay = pkt->headerDelay + pkt->payloadDelay; pkt->headerDelay = pkt->payloadDelay = 0; - slavePort.schedTimingResp(pkt, bridge.clockEdge(delay)); + slavePort.schedTimingResp(pkt, bridge.clockEdge(delay) + + receive_delay); return true; } @@ -191,10 +195,15 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt) } if (!retryReq) { - // @todo: We need to pay for this and not just zero it out + // technically the packet only reaches us after the header + // delay, and typically we also need to deserialise any + // payload (unless the two sides of the bridge are + // synchronous) + Tick receive_delay = pkt->headerDelay + pkt->payloadDelay; pkt->headerDelay = pkt->payloadDelay = 0; - masterPort.schedTimingReq(pkt, bridge.clockEdge(delay)); + masterPort.schedTimingReq(pkt, bridge.clockEdge(delay) + + receive_delay); } } diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc index 639ccbe31..d4dbe1946 100644 --- a/src/mem/simple_mem.cc +++ b/src/mem/simple_mem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2013, 2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -116,7 +116,10 @@ SimpleMemory::recvTimingReq(PacketPtr pkt) return false; } - // @todo someone should pay for this + // technically the packet only reaches us after the header delay, + // and since this is a memory controller we also need to + // deserialise the payload before performing any write operation + Tick receive_delay = pkt->headerDelay + pkt->payloadDelay; pkt->headerDelay = pkt->payloadDelay = 0; // update the release time according to the bandwidth limit, and @@ -150,10 +153,24 @@ SimpleMemory::recvTimingReq(PacketPtr pkt) // recvAtomic() should already have turned packet into // atomic response assert(pkt->isResponse()); - // to keep things simple (and in order), we put the packet at - // the end even if the latency suggests it should be sent - // before the packet(s) before it - packetQueue.emplace_back(pkt, curTick() + getLatency()); + + Tick when_to_send = curTick() + receive_delay + getLatency(); + + // typically this should be added at the end, so start the + // insertion sort with the last element, also make sure not to + // re-order in front of some existing packet with the same + // address, the latter is important as this memory effectively + // hands out exclusive copies (shared is not asserted) + auto i = packetQueue.end(); + --i; + while (i != packetQueue.begin() && when_to_send < i->tick && + i->pkt->getAddr() != pkt->getAddr()) + --i; + + // emplace inserts the element before the position pointed to by + // the iterator, so advance it one step + packetQueue.emplace(++i, pkt, when_to_send); + if (!retryResp && !dequeueEvent.scheduled()) schedule(dequeueEvent, packetQueue.back().tick); } else { diff --git a/src/mem/simple_mem.hh b/src/mem/simple_mem.hh index d19de7608..23cd3c80d 100644 --- a/src/mem/simple_mem.hh +++ b/src/mem/simple_mem.hh @@ -49,7 +49,7 @@ #ifndef __SIMPLE_MEMORY_HH__ #define __SIMPLE_MEMORY_HH__ -#include +#include #include "mem/abstract_mem.hh" #include "mem/port.hh" @@ -125,7 +125,7 @@ class SimpleMemory : public AbstractMemory * actual memory access. Note that this is where the packet spends * the memory latency. */ - std::deque packetQueue; + std::list packetQueue; /** * Bandwidth in ticks per byte. The regulation affects the -- cgit v1.2.3