summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Balboni <Marco.Balboni@ARM.com>2015-03-02 04:00:46 -0500
committerMarco Balboni <Marco.Balboni@ARM.com>2015-03-02 04:00:46 -0500
commitd35dd71ab4ac44a79ac22dca82277a43cd59f3c6 (patch)
treeeb42b8079289e3cef8f265556944941dc012e66c
parent7be9d4eb673b9d9b45eabfd40a56718569a2a1be (diff)
downloadgem5-d35dd71ab4ac44a79ac22dca82277a43cd59f3c6.tar.xz
mem: Add crossbar latencies
This patch introduces latencies in crossbar that were neglected before. In particular, it adds three parameters in crossbar model: front_end_latency, forward_latency, and response_latency. Along with these parameters, three corresponding members are added: frontEndLatency, forwardLatency, and responseLatency. The coherent crossbar has an additional snoop_response_latency. The latency of the request path through the xbar is set as --> frontEndLatency + forwardLatency In case the snoop filter is enabled, the request path latency is charged also by look-up latency of the snoop filter. --> frontEndLatency + SF(lookupLatency) + forwardLatency. The latency of the response path through the xbar is set instead as --> responseLatency. In case of snoop response, if the response is treated as a normal response the latency associated is again --> responseLatency; If instead it is forwarded as snoop response we add an additional variable + snoopResponseLatency and the latency associated is --> snoopResponseLatency; Furthermore, this patch lets the crossbar progress on the next clock edge after an unused retry, changing the time the crossbar considers itself busy after sending a retry that was not acted upon.
-rw-r--r--src/mem/XBar.py42
-rw-r--r--src/mem/coherent_xbar.cc51
-rw-r--r--src/mem/coherent_xbar.hh5
-rw-r--r--src/mem/noncoherent_xbar.cc31
-rw-r--r--src/mem/noncoherent_xbar.hh2
-rw-r--r--src/mem/xbar.cc63
-rw-r--r--src/mem/xbar.hh16
7 files changed, 153 insertions, 57 deletions
diff --git a/src/mem/XBar.py b/src/mem/XBar.py
index 2aeefe132..64910ed72 100644
--- a/src/mem/XBar.py
+++ b/src/mem/XBar.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012 ARM Limited
+# Copyright (c) 2012, 2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -49,10 +49,29 @@ class BaseXBar(MemObject):
type = 'BaseXBar'
abstract = True
cxx_header = "mem/xbar.hh"
- slave = VectorSlavePort("vector port for connecting masters")
- master = VectorMasterPort("vector port for connecting slaves")
- header_cycles = Param.Cycles(1, "cycles of overhead per transaction")
- width = Param.Unsigned(8, "xbar width (bytes)")
+
+ slave = VectorSlavePort("Vector port for connecting masters")
+ master = VectorMasterPort("Vector port for connecting slaves")
+
+ # Latencies governing the time taken for the variuos paths a
+ # packet has through the crossbar. Note that the crossbar itself
+ # does not add the latency due to assumptions in the coherency
+ # mechanism. Instead the latency is annotated on the packet and
+ # left to the neighbouring modules.
+ #
+ # A request incurs the frontend latency, possibly snoop filter
+ # lookup latency, and forward latency. A response incurs the
+ # response latency. Frontend latency encompasses arbitration and
+ # deciding what to do when a request arrives. the forward latency
+ # is the latency involved once a decision is made to forward the
+ # request. The response latency, is similar to the forward
+ # latency, but for responses rather than requests.
+ frontend_latency = Param.Cycles(3, "Frontend latency")
+ forward_latency = Param.Cycles(4, "Forward latency")
+ response_latency = Param.Cycles(2, "Response latency")
+
+ # Width governing the throughput of the crossbar
+ width = Param.Unsigned(8, "Datapath width per port (bytes)")
# The default port can be left unconnected, or be used to connect
# a default slave port
@@ -74,12 +93,21 @@ class CoherentXBar(BaseXBar):
type = 'CoherentXBar'
cxx_header = "mem/coherent_xbar.hh"
+ # The coherent crossbar additionally has snoop responses that are
+ # forwarded after a specific latency.
+ snoop_response_latency = Param.Cycles(4, "Snoop response latency")
+
+ # An optional snoop filter
+ snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter")
+
system = Param.System(Parent.any, "System that the crossbar belongs to.")
- snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter.")
class SnoopFilter(SimObject):
type = 'SnoopFilter'
cxx_header = "mem/snoop_filter.hh"
- lookup_latency = Param.Cycles(3, "lookup latency (cycles)")
+
+ # Lookup latency of the snoop filter, added to requests that pass
+ # through a coherent crossbar.
+ lookup_latency = Param.Cycles(1, "Lookup latency")
system = Param.System(Parent.any, "System that the crossbar belongs to.")
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index 667ff96f9..d4188f0f2 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -55,7 +55,8 @@
#include "sim/system.hh"
CoherentXBar::CoherentXBar(const CoherentXBarParams *p)
- : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter)
+ : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter),
+ snoopResponseLatency(p->snoop_response_latency)
{
// create the ports based on the size of the master and slave
// vector ports, and the presence of the default port, the ports
@@ -167,8 +168,17 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
- calcPacketTiming(pkt);
- Tick packetFinishTime = curTick() + pkt->payloadDelay;
+ // store the old header delay so we can restore it if needed
+ Tick old_header_delay = pkt->headerDelay;
+
+ // a request sees the frontend and forward latency
+ Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
+
+ // set the packet header and payload delay
+ calcPacketTiming(pkt, xbar_delay);
+
+ // determine how long to be crossbar layer is busy
+ Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// uncacheable requests need never be snooped
if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@@ -177,6 +187,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
if (snoopFilter) {
// check with the snoop filter where to forward this packet
auto sf_res = snoopFilter->lookupRequest(pkt, *src_port);
+ // If SnoopFilter is enabled, the total time required by a packet
+ // to be delivered through the xbar has to be charged also with
+ // to lookup latency of the snoop filter (sf_res.second).
+ pkt->headerDelay += sf_res.second * clockPeriod();
packetFinishTime += sf_res.second * clockPeriod();
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\
" SF size: %i lat: %i\n", src_port->name(),
@@ -221,15 +235,15 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
assert(!is_express_snoop);
assert(!pkt->memInhibitAsserted());
- // undo the calculation so we can check for 0 again
- pkt->headerDelay = pkt->payloadDelay = 0;
+ // restore the header delay
+ pkt->headerDelay = old_header_delay;
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
// update the layer state and schedule an idle event
reqLayers[master_port_id]->failedTiming(src_port,
- clockEdge(headerCycles));
+ clockEdge(Cycles(1)));
} else {
// express snoops currently bypass the crossbar state entirely
if (!is_express_snoop) {
@@ -300,8 +314,14 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
- calcPacketTiming(pkt);
- Tick packetFinishTime = curTick() + pkt->payloadDelay;
+ // a response sees the response latency
+ Tick xbar_delay = responseLatency * clockPeriod();
+
+ // set the packet header and payload delay
+ calcPacketTiming(pkt, xbar_delay);
+
+ // determine how long to be crossbar layer is busy
+ Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) {
// let the snoop filter inspect the response and update its state
@@ -426,8 +446,17 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
// responses are never express snoops
assert(!pkt->isExpressSnoop());
- calcPacketTiming(pkt);
- Tick packetFinishTime = curTick() + pkt->payloadDelay;
+ // a snoop response sees the snoop response latency, and if it is
+ // forwarded as a normal response, the response latency
+ Tick xbar_delay =
+ (forwardAsSnoop ? snoopResponseLatency : responseLatency) *
+ clockPeriod();
+
+ // set the packet header and payload delay
+ calcPacketTiming(pkt, xbar_delay);
+
+ // determine how long to be crossbar layer is busy
+ Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// forward it either as a snoop response or a normal response
if (forwardAsSnoop) {
diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh
index ffe4a066b..3cf10689c 100644
--- a/src/mem/coherent_xbar.hh
+++ b/src/mem/coherent_xbar.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -272,6 +272,9 @@ class CoherentXBar : public BaseXBar
* broadcast needed for probes. NULL denotes an absent filter. */
SnoopFilter *snoopFilter;
+ /** Cycles of snoop response latency.*/
+ const Cycles snoopResponseLatency;
+
/** Function called by the port when the crossbar is recieving a Timing
request packet.*/
bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);
diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc
index db33f0f70..e2bc85cad 100644
--- a/src/mem/noncoherent_xbar.cc
+++ b/src/mem/noncoherent_xbar.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -127,8 +127,17 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
- calcPacketTiming(pkt);
- Tick packetFinishTime = curTick() + pkt->payloadDelay;
+ // store the old header delay so we can restore it if needed
+ Tick old_header_delay = pkt->headerDelay;
+
+ // a request sees the frontend and forward latency
+ Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
+
+ // set the packet header and payload delay
+ calcPacketTiming(pkt, xbar_delay);
+
+ // determine how long to be crossbar layer is busy
+ Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// before forwarding the packet (and possibly altering it),
// remember if we are expecting a response
@@ -145,12 +154,12 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
DPRINTF(NoncoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
- // undo the calculation so we can check for 0 again
- pkt->headerDelay = pkt->payloadDelay = 0;
+ // restore the header delay as it is additive
+ pkt->headerDelay = old_header_delay;
// occupy until the header is sent
reqLayers[master_port_id]->failedTiming(src_port,
- clockEdge(headerCycles));
+ clockEdge(Cycles(1)));
return false;
}
@@ -200,8 +209,14 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
- calcPacketTiming(pkt);
- Tick packetFinishTime = curTick() + pkt->payloadDelay;
+ // a response sees the response latency
+ Tick xbar_delay = responseLatency * clockPeriod();
+
+ // set the packet header and payload delay
+ calcPacketTiming(pkt, xbar_delay);
+
+ // determine how long to be crossbar layer is busy
+ Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// send the packet through the destination slave port
bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt);
diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh
index ba99d9be8..64a1064ab 100644
--- a/src/mem/noncoherent_xbar.hh
+++ b/src/mem/noncoherent_xbar.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc
index 7ac937177..bc649581b 100644
--- a/src/mem/xbar.cc
+++ b/src/mem/xbar.cc
@@ -56,7 +56,10 @@
BaseXBar::BaseXBar(const BaseXBarParams *p)
: MemObject(p),
- headerCycles(p->header_cycles), width(p->width),
+ frontendLatency(p->frontend_latency),
+ forwardLatency(p->forward_latency),
+ responseLatency(p->response_latency),
+ width(p->width),
gotAddrRanges(p->port_default_connection_count +
p->port_master_connection_count, false),
gotAllAddrRanges(false), defaultPortID(InvalidPortID),
@@ -102,34 +105,41 @@ BaseXBar::getSlavePort(const std::string &if_name, PortID idx)
}
void
-BaseXBar::calcPacketTiming(PacketPtr pkt)
+BaseXBar::calcPacketTiming(PacketPtr pkt, Tick header_delay)
{
// the crossbar will be called at a time that is not necessarily
// coinciding with its own clock, so start by determining how long
// until the next clock edge (could be zero)
Tick offset = clockEdge() - curTick();
- // Determine how many cycles are needed to send the data
- // If the packet has no data we take into account just the cycle to send
- // the header.
- unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
-
- // before setting the bus delay fields of the packet, ensure that
- // the delay from any previous crossbar has been accounted for
- if (pkt->headerDelay != 0 || pkt->payloadDelay != 0)
- panic("Packet %s already has delay (%d, %d) that should be "
- "accounted for.\n", pkt->cmdString(), pkt->headerDelay,
- pkt->payloadDelay);
-
- // The headerDelay takes into account the relative time to deliver the
- // header of the packet. It will be charged of the additional delay of
- // the xbar if the packet goes through it.
- pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset;
-
- // The payloadDelay takes into account the relative time to deliver the
- // payload of the packet. If the packet has no data its value is just one
- // tick (due to header) plus the offset value.
- pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset;
+ // the header delay depends on the path through the crossbar, and
+ // we therefore rely on the caller to provide the actual
+ // value
+ pkt->headerDelay += offset + header_delay;
+
+ // note that we add the header delay to the existing value, and
+ // align it to the crossbar clock
+
+ // do a quick sanity check to ensure the timings are not being
+ // ignored, note that this specific value may cause problems for
+ // slower interconnects
+ panic_if(pkt->headerDelay > SimClock::Int::us,
+ "Encountered header delay exceeding 1 us\n");
+
+ if (pkt->hasData()) {
+ // the payloadDelay takes into account the relative time to
+ // deliver the payload of the packet, after the header delay,
+ // we take the maximum since the payload delay could already
+ // be longer than what this parcitular crossbar enforces.
+ pkt->payloadDelay = std::max<Tick>(pkt->payloadDelay,
+ divCeil(pkt->getSize(), width) *
+ clockPeriod());
+ }
+
+ // the payload delay is not paying for the clock offset as that is
+ // already done using the header delay, and the payload delay is
+ // also used to determine how long the crossbar layer is busy and
+ // thus regulates throughput
}
template <typename SrcType, typename DstType>
@@ -274,14 +284,15 @@ BaseXBar::Layer<SrcType,DstType>::retryWaiting()
sendRetry(retryingPort);
// If the layer is still in the retry state, sendTiming wasn't
- // called in zero time (e.g. the cache does this), burn a cycle
+ // called in zero time (e.g. the cache does this when a writeback
+ // is squashed)
if (state == RETRY) {
// update the state to busy and reset the retrying port, we
// have done our bit and sent the retry
state = BUSY;
- // occupy the crossbar layer until the next cycle ends
- occupyLayer(xbar.clockEdge(Cycles(1)));
+ // occupy the crossbar layer until the next clock edge
+ occupyLayer(xbar.clockEdge());
}
}
diff --git a/src/mem/xbar.hh b/src/mem/xbar.hh
index f51b08da2..ed678d9d0 100644
--- a/src/mem/xbar.hh
+++ b/src/mem/xbar.hh
@@ -309,8 +309,15 @@ class BaseXBar : public MemObject
{ retry_port->sendRetrySnoopResp(); }
};
- /** cycles of overhead per transaction */
- const Cycles headerCycles;
+ /**
+ * Cycles of front-end pipeline including the delay to accept the request
+ * and to decode the address.
+ */
+ const Cycles frontendLatency;
+ /** Cycles of forward latency */
+ const Cycles forwardLatency;
+ /** Cycles of response latency */
+ const Cycles responseLatency;
/** the width of the xbar in bytes */
const uint32_t width;
@@ -404,8 +411,11 @@ class BaseXBar : public MemObject
* headerDelay and payloadDelay fields of the packet
* object with the relative number of ticks required to transmit
* the header and the payload, respectively.
+ *
+ * @param pkt Packet to populate with timings
+ * @param header_delay Header delay to be added
*/
- void calcPacketTiming(PacketPtr pkt);
+ void calcPacketTiming(PacketPtr pkt, Tick header_delay);
/**
* Remember for each of the master ports of the crossbar if we got