diff options
author | Andreas Hansson <andreas.hansson@arm.com> | 2013-02-19 05:56:06 -0500 |
---|---|---|
committer | Andreas Hansson <andreas.hansson@arm.com> | 2013-02-19 05:56:06 -0500 |
commit | b3fc8839c4727da575ed916cbd6a76d8ad5fc644 (patch) | |
tree | 4a200b41d9d2c2222ca88d85af82dd17c330ea7f /src | |
parent | 362160c8aeeb5b655158061ad57404124b4618f3 (diff) | |
download | gem5-b3fc8839c4727da575ed916cbd6a76d8ad5fc644.tar.xz |
mem: Make packet bus-related time accounting relative
This patch changes the bus-related time accounting done in the packet
to be relative. Besides making it easier to align the cache timing to
cache clock cycles, it also makes it possible to create a Last-Level
Cache (LLC) directly to a memory controller without a bus inbetween.
The bus is unique in that it does not ever make the packets wait to
reflect the time spent forwarding them. Instead, the cache is
currently responsible for making the packets wait. Thus, the bus
annotates the packets with the time needed for the first word to
appear, and also the last word. The cache then delays the packets in
its queues before passing them on. It is worth noting that every
object attached to a bus (devices, memories, bridges, etc) should be
doing this if we opt for keeping this way of accounting for the bus
timing.
Diffstat (limited to 'src')
-rw-r--r-- | src/mem/bus.cc | 32 | ||||
-rw-r--r-- | src/mem/bus.hh | 11 | ||||
-rw-r--r-- | src/mem/cache/cache_impl.hh | 20 | ||||
-rw-r--r-- | src/mem/coherent_bus.cc | 16 | ||||
-rw-r--r-- | src/mem/noncoherent_bus.cc | 12 | ||||
-rw-r--r-- | src/mem/packet.hh | 34 |
6 files changed, 75 insertions, 50 deletions
diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 4d9cdbe88..690d85373 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -129,30 +129,24 @@ BaseBus::getSlavePort(const std::string &if_name, PortID idx) } } -Tick +void BaseBus::calcPacketTiming(PacketPtr pkt) { - // determine the header time rounded to the closest following - // clock edge - Tick headerTime = clockEdge(headerCycles); - - // The packet will be sent. Figure out how long it occupies the bus, and - // how much of that time is for the first "word", aka bus width. - Cycles numCycles(0); - if (pkt->hasData()) { - // If a packet has data, it needs ceil(size/width) cycles to send it - unsigned dataSize = pkt->getSize(); - numCycles = Cycles(divCeil(dataSize, width)); - } + // the bus will be called at a time that is not necessarily + // coinciding with its own clock, so start by determining how long + // until the next clock edge (could be zero) + Tick offset = nextCycle() - curTick(); - // The first word will be delivered on the cycle after the header. - pkt->firstWordTime = headerTime + clockPeriod(); + // determine how many cycles are needed to send the data + unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0; - // Note that currently finishTime can be smaller than - // firstWordTime if the packet has no data - pkt->finishTime = headerTime + numCycles * clockPeriod(); + // The first word will be delivered on the cycle after the header. + pkt->busFirstWordDelay = (headerCycles + 1) * clockPeriod() + offset; - return headerTime; + // Note that currently busLastWordDelay can be smaller than + // busFirstWordDelay if the packet has no data + pkt->busLastWordDelay = (headerCycles + dataCycles) * clockPeriod() + + offset; } template <typename PortClass> diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 015bb51a0..35c206fa9 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -302,12 +302,13 @@ class BaseBus : public MemObject */ AddrRangeList getAddrRanges() const; - /** Calculate the timing parameters for the packet. Updates the - * firstWordTime and finishTime fields of the packet object. - * Returns the tick at which the packet header is completed (which - * will be all that is sent if the target rejects the packet). + /** + * Calculate the timing parameters for the packet. Updates the + * busFirstWordDelay and busLastWordDelay fields of the packet + * object with the relative number of ticks required to transmit + * the header and the first word, and the last word, respectively. */ - Tick calcPacketTiming(PacketPtr pkt); + void calcPacketTiming(PacketPtr pkt); /** * Ask everyone on the bus what their size is and determine the diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index a7e6a6186..7aa922055 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -898,8 +898,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = responseLatency * clockPeriod() + - (transfer_offset ? pkt->finishTime : pkt->firstWordTime); + completion_time = curTick() + responseLatency * clockPeriod() + + (transfer_offset ? pkt->busLastWordDelay : + pkt->busFirstWordDelay); assert(!target->pkt->req->isUncacheable()); @@ -914,15 +915,15 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = responseLatency * clockPeriod() + - pkt->finishTime; + completion_time = curTick() + responseLatency * clockPeriod() + + pkt->busLastWordDelay; target->pkt->req->setExtraData(0); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path // from lower level cahces/memory to the core. - completion_time = responseLatency * clockPeriod() + - pkt->finishTime; + completion_time = curTick() + responseLatency * clockPeriod() + + pkt->busLastWordDelay; if (pkt->isRead() && !is_error) { target->pkt->setData(pkt->getPtr<uint8_t>()); } @@ -984,7 +985,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt) } MSHRQueue *mq = mshr->queue; mq->markPending(mshr); - requestMemSideBus((RequestCause)mq->index, pkt->finishTime); + requestMemSideBus((RequestCause)mq->index, curTick() + + pkt->busLastWordDelay); } else { mq->deallocate(mshr); if (wasFull && !mq->isFull()) { @@ -1217,7 +1219,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk, std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize); } - blk->whenReady = pkt->finishTime; + blk->whenReady = curTick() + pkt->busLastWordDelay; return blk; } @@ -1575,7 +1577,7 @@ Cache<TagStore>::getTimingPacket() pkt = new Packet(tgt_pkt); pkt->cmd = MemCmd::UpgradeFailResp; pkt->senderState = mshr; - pkt->firstWordTime = pkt->finishTime = curTick(); + pkt->busFirstWordDelay = pkt->busLastWordDelay = 0; handleResponse(pkt); return NULL; } else if (mshr->isForwardNoResponse()) { diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc index 409f69229..b57484ab3 100644 --- a/src/mem/coherent_bus.cc +++ b/src/mem/coherent_bus.cc @@ -135,8 +135,8 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) // set the source port for routing of the response pkt->setSrc(slave_port_id); - Tick headerFinishTime = is_express_snoop ? 0 : calcPacketTiming(pkt); - Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime; + calcPacketTiming(pkt); + Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // uncacheable requests need never be snooped if (!pkt->req->isUncacheable() && !system->bypassCaches()) { @@ -183,7 +183,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) src_port->name(), pkt->cmdString(), pkt->getAddr()); // update the bus state and schedule an idle event - reqLayer.failedTiming(src_port, headerFinishTime); + reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles))); } else { // update the bus state and schedule an idle event reqLayer.succeededTiming(packetFinishTime); @@ -211,7 +211,7 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) src_port->name(), pkt->cmdString(), pkt->getAddr()); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->finishTime; + Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // the packet is a normal response to a request that we should // have seen passing through the bus @@ -281,7 +281,7 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) assert(!pkt->isExpressSnoop()); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->finishTime; + Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // determine if the response is from a snoop request we // created as the result of a normal request (in which case it @@ -385,7 +385,8 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) response_latency = snoop_response_latency; } - pkt->finishTime = curTick() + response_latency; + // @todo: Not setting first-word time + pkt->busLastWordDelay = response_latency; return response_latency; } @@ -405,7 +406,8 @@ CoherentBus::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) if (snoop_response_cmd != MemCmd::InvalidCmd) pkt->cmd = snoop_response_cmd; - pkt->finishTime = curTick() + snoop_response_latency; + // @todo: Not setting first-word time + pkt->busLastWordDelay = snoop_response_latency; return snoop_response_latency; } diff --git a/src/mem/noncoherent_bus.cc b/src/mem/noncoherent_bus.cc index ae5344425..4f6751512 100644 --- a/src/mem/noncoherent_bus.cc +++ b/src/mem/noncoherent_bus.cc @@ -110,8 +110,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) // set the source port for routing of the response pkt->setSrc(slave_port_id); - Tick headerFinishTime = calcPacketTiming(pkt); - Tick packetFinishTime = pkt->finishTime; + calcPacketTiming(pkt); + Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // since it is a normal request, determine the destination // based on the address and attempt to send the packet @@ -124,7 +124,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id) DPRINTF(NoncoherentBus, "recvTimingReq: src %s %s 0x%x RETRY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); - reqLayer.failedTiming(src_port, headerFinishTime); + // occupy until the header is sent + reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles))); return false; } @@ -152,7 +153,7 @@ NoncoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id) src_port->name(), pkt->cmdString(), pkt->getAddr()); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->finishTime; + Tick packetFinishTime = pkt->busLastWordDelay + curTick(); // send the packet to the destination through one of our slave // ports, as determined by the destination field @@ -189,7 +190,8 @@ NoncoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id) // forward the request to the appropriate destination Tick response_latency = masterPorts[dest_id]->sendAtomic(pkt); - pkt->finishTime = curTick() + response_latency; + // @todo: Not setting first-word time + pkt->busLastWordDelay = response_latency; return response_latency; } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 181320850..92fb2a31c 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -330,11 +330,23 @@ class Packet : public Printable public: - /// The time at which the packet will be fully transmitted - Tick finishTime; + /** + * The extra delay from seeing the packet until the first word is + * transmitted by the bus that provided it (if any). This delay is + * used to communicate the bus waiting time to the neighbouring + * object (e.g. a cache) that actually makes the packet wait. As + * the delay is relative, a 32-bit unsigned should be sufficient. + */ + uint32_t busFirstWordDelay; - /// The time at which the first chunk of the packet will be transmitted - Tick firstWordTime; + /** + * The extra delay from seeing the packet until the last word is + * transmitted by the bus that provided it (if any). Similar to + * the first word time, this is used to make up for the fact that + * the bus does not make the packet wait. As the delay is relative, + * a 32-bit unsigned should be sufficient. + */ + uint32_t busLastWordDelay; /** * A virtual base opaque structure used to hold state associated @@ -583,6 +595,7 @@ class Packet : public Printable : cmd(_cmd), req(_req), data(NULL), src(InvalidPortID), dest(InvalidPortID), bytesValidStart(0), bytesValidEnd(0), + busFirstWordDelay(0), busLastWordDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -604,6 +617,7 @@ class Packet : public Printable : cmd(_cmd), req(_req), data(NULL), src(InvalidPortID), dest(InvalidPortID), bytesValidStart(0), bytesValidEnd(0), + busFirstWordDelay(0), busLastWordDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -625,7 +639,10 @@ class Packet : public Printable : cmd(pkt->cmd), req(pkt->req), data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL), addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest), - bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd), + bytesValidStart(pkt->bytesValidStart), + bytesValidEnd(pkt->bytesValidEnd), + busFirstWordDelay(pkt->busFirstWordDelay), + busLastWordDelay(pkt->busLastWordDelay), senderState(pkt->senderState) { if (!clearFlags) @@ -664,6 +681,13 @@ class Packet : public Printable addr = req->getPaddr(); size = req->getSize(); + src = InvalidPortID; + dest = InvalidPortID; + bytesValidStart = 0; + bytesValidEnd = 0; + busFirstWordDelay = 0; + busLastWordDelay = 0; + flags.set(VALID_ADDR|VALID_SIZE); deleteData(); } |