summaryrefslogtreecommitdiff
path: root/src/cpu/o3/lsq_unit.hh
diff options
context:
space:
mode:
authorArthur Perais <arthur.perais@inria.fr>2016-12-21 15:04:06 -0600
committerArthur Perais <arthur.perais@inria.fr>2016-12-21 15:04:06 -0600
commite5fb6752d613a6f85e2f93b4c01836ac59a8c90c (patch)
tree7bec60d7645ed4a1d7e20dc8071c0dafd288b786 /src/cpu/o3/lsq_unit.hh
parent3a656da1a64f08d5e4c755e94cefda5a4e985a50 (diff)
downloadgem5-e5fb6752d613a6f85e2f93b4c01836ac59a8c90c.tar.xz
cpu: Clarify meaning of cachePorts variable in lsq_unit.hh of O3
cachePorts currently constrains the number of store packets written to the D-Cache each cycle), but loads currently affect this variable. This leads to unexpected congestion (e.g., setting cachePorts to a realistic 1 will in fact allow a store to WB only if no loads have accessed the D-Cache this cycle). In the absence of arbitration, this patch decouples how many loads can be done per cycle from how many stores can be done per cycle. Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Diffstat (limited to 'src/cpu/o3/lsq_unit.hh')
-rw-r--r--src/cpu/o3/lsq_unit.hh20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index b1b0aae3a..10d4966e8 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -113,7 +113,7 @@ class LSQUnit {
* @todo: Move the number of used ports up to the LSQ level so it can
* be shared by all LSQ units.
*/
- void tick() { usedPorts = 0; }
+ void tick() { usedStorePorts = 0; }
/** Inserts an instruction. */
void insert(DynInstPtr &inst);
@@ -429,11 +429,11 @@ class LSQUnit {
int storeTail;
/// @todo Consider moving to a more advanced model with write vs read ports
- /** The number of cache ports available each cycle. */
- int cachePorts;
+ /** The number of cache ports available each cycle (stores only). */
+ int cacheStorePorts;
- /** The number of used cache ports in this cycle. */
- int usedPorts;
+ /** The number of used cache ports in this cycle by stores. */
+ int usedStorePorts;
//list<InstSeqNum> mshrSeqNums;
@@ -765,8 +765,6 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
load_inst->memData = new uint8_t[req->getSize()];
}
- ++usedPorts;
-
// if we the cache is not blocked, do cache access
bool completedFirst = false;
PacketPtr data_pkt = Packet::createRead(req);
@@ -800,6 +798,11 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
state->mainPkt = data_pkt;
}
+ // For now, load throughput is constrained by the number of
+ // load FUs only, and loads do not consume a cache port (only
+ // stores do).
+ // @todo We should account for cache port contention
+ // and arbitrate between loads and stores.
bool successful_load = true;
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
successful_load = false;
@@ -811,7 +814,8 @@ LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
// load will be squashed, so indicate this to the state object.
// The first packet will return in completeDataAccess and be
// handled there.
- ++usedPorts;
+ // @todo We should also account for cache port contention
+ // here.
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
// The main packet will be deleted in completeDataAccess.
state->complete();