summaryrefslogtreecommitdiff
path: root/src/mem
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem')
-rw-r--r--src/mem/DRAMCtrl.py38
-rw-r--r--src/mem/dram_ctrl.cc127
-rw-r--r--src/mem/dram_ctrl.hh7
3 files changed, 143 insertions, 29 deletions
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index b06b8e7eb..02fa4fa46 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -111,6 +111,11 @@ class DRAMCtrl(AbstractMemory):
"device/chip")
devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
ranks_per_channel = Param.Unsigned("Number of ranks per channel")
+
+ # default to 0 bank groups per rank, indicating bank group architecture
+ # is not used
+ # update per memory class when bank group architecture is supported
+ bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
banks_per_rank = Param.Unsigned("Number of banks per rank")
# only used for the address mapping as the controller by
# construction is a single channel and multiple controllers have
@@ -147,8 +152,17 @@ class DRAMCtrl(AbstractMemory):
# This parameter has to account for burst length.
# Read/Write requests with data size larger than one full burst are broken
# down into multiple requests in the controller
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
+ # CAS-to-CAS delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default case
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
+
# time taken to complete one refresh cycle (N rows in all banks)
tRFC = Param.Latency("Refresh cycle time")
@@ -171,6 +185,9 @@ class DRAMCtrl(AbstractMemory):
# minimum row activate to row activate delay time
tRRD = Param.Latency("ACT to ACT delay")
+ # only utilized with bank group architectures; set to 0 for default case
+ tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
+
# time window in which a maximum number of activates are allowed
# to take place, set to 0 to disable
tXAW = Param.Latency("X activation window")
@@ -274,6 +291,10 @@ class DDR4_2400_x64(DRAMCtrl):
# Use a single rank
ranks_per_channel = 1
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 4 for x4, x8 case
+ bank_groups_per_rank = 4
+
# DDR4 has 16 banks (4 bank groups) in all
# configurations. Currently we do not capture the additional
# constraints incurred by the bank groups
@@ -283,16 +304,29 @@ class DDR4_2400_x64(DRAMCtrl):
tCK = '0.833ns'
# 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
tBURST = '3.333ns'
+ # @2400 data rate, tCCD_L is 6 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '5ns';
+
# DDR4-2400 17-17-17
tRCD = '14.16ns'
tCL = '14.16ns'
tRP = '14.16ns'
tRAS = '32ns'
- # Here using the average of RRD_S and RRD_L
- tRRD = '4.1ns'
+ # RRD_S (different bank group) for 1K page is MAX(4 CK, 3.3ns)
+ tRRD = '3.3ns'
+
+ # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
tXAW = '21ns'
activation_limit = 4
tRFC = '260ns'
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index ca562f4f7..38c240fcf 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -69,6 +69,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
columnsPerRowBuffer(rowBufferSize / burstSize),
columnsPerStripe(range.granularity() / burstSize),
ranksPerChannel(p->ranks_per_channel),
+ bankGroupsPerRank(p->bank_groups_per_rank),
+ bankGroupArch(p->bank_groups_per_rank > 0),
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size),
@@ -77,9 +79,9 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
minWritesPerSwitch(p->min_writes_per_switch),
writesThisTime(0), readsThisTime(0),
tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
- tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR),
- tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
- tXAW(p->tXAW), activationLimit(p->activation_limit),
+ tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
+ tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
+ tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit),
memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
pageMgmt(p->page_policy),
maxAccessesPerRow(p->max_accesses_per_row),
@@ -104,6 +106,19 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
for (int b = 0; b < banksPerRank; b++) {
banks[r][b].rank = r;
banks[r][b].bank = b;
+ if (bankGroupArch) {
+ // Simply assign lower bits to bank group in order to
+ // rotate across bank groups as banks are incremented
+ // e.g. with 4 banks per bank group and 16 banks total:
+ // banks 0,4,8,12 are in bank group 0
+ // banks 1,5,9,13 are in bank group 1
+ // banks 2,6,10,14 are in bank group 2
+ // banks 3,7,11,15 are in bank group 3
+ banks[r][b].bankgr = b % bankGroupsPerRank;
+ } else {
+ // No bank groups; simply assign to bank number
+ banks[r][b].bankgr = b;
+ }
}
}
@@ -168,6 +183,35 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
tREFI, tRP, tRFC);
}
+
+ // basic bank group architecture checks ->
+ if (bankGroupArch) {
+ // must have at least one bank per bank group
+ if (bankGroupsPerRank > banksPerRank) {
+ fatal("banks per rank (%d) must be equal to or larger than "
+ "banks groups per rank (%d)\n",
+ banksPerRank, bankGroupsPerRank);
+ }
+ // must have same number of banks in each bank group
+ if ((banksPerRank % bankGroupsPerRank) != 0) {
+ fatal("Banks per rank (%d) must be evenly divisible by bank groups "
+ "per rank (%d) for equal banks per bank group\n",
+ banksPerRank, bankGroupsPerRank);
+ }
+ // tCCD_L should be greater than minimal, back-to-back burst delay
+ if (tCCD_L <= tBURST) {
+ fatal("tCCD_L (%d) should be larger than tBURST (%d) when "
+ "bank groups per rank (%d) is greater than 1\n",
+ tCCD_L, tBURST, bankGroupsPerRank);
+ }
+ // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
+ if (tRRD_L <= tRRD) {
+ fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
+ "bank groups per rank (%d) is greater than 1\n",
+ tRRD_L, tRRD, bankGroupsPerRank);
+ }
+ }
+
}
void
@@ -824,14 +868,25 @@ DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row)
bank.preAllowedAt = act_tick + tRAS;
// Respect the row-to-column command delay
- bank.colAllowedAt = act_tick + tRCD;
+ bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt);
// start by enforcing tRRD
for(int i = 0; i < banksPerRank; i++) {
// next activate to any bank in this rank must not happen
// before tRRD
- banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
- banks[rank][i].actAllowedAt);
+ if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) {
+ // bank group architecture requires longer delays between
+ // ACT commands within the same bank group. Use tRRD_L
+ // in this case
+ banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L,
+ banks[rank][i].actAllowedAt);
+ } else {
+ // use shorter tRRD value when either
+ // 1) bank group architecture is not supportted
+ // 2) bank is in a different bank group
+ banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
+ banks[rank][i].actAllowedAt);
+ }
}
// next, we deal with tXAW, if the activation limit is disabled
@@ -986,9 +1041,38 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
// only one burst can use the bus at any one point in time
assert(dram_pkt->readyTime - busBusyUntil >= tBURST);
- // not strictly necessary, but update the time for the next
- // read/write (add a max with tCCD here)
- bank.colAllowedAt = cmd_at + tBURST;
+ // update the time for the next read/write burst for each
+ // bank (add a max with tCCD/tCCD_L here)
+ Tick cmd_dly;
+ for(int j = 0; j < ranksPerChannel; j++) {
+ for(int i = 0; i < banksPerRank; i++) {
+ // next burst to same bank group in this rank must not happen
+ // before tCCD_L. Different bank group timing requirement is
+ // tBURST; Add tCS for different ranks
+ if (dram_pkt->rank == j) {
+ if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) {
+ // bank group architecture requires longer delays between
+ // RD/WR burst commands to the same bank group.
+ // Use tCCD_L in this case
+ cmd_dly = tCCD_L;
+ } else {
+ // use tBURST (equivalent to tCCD_S), the shorter
+ // cas-to-cas delay value, when either:
+ // 1) bank group architecture is not supportted
+ // 2) bank is in a different bank group
+ cmd_dly = tBURST;
+ }
+ } else {
+ // different rank is by default in a different bank group
+ // use tBURST (equivalent to tCCD_S), which is the shorter
+ // cas-to-cas delay in this case
+ // Add tCS to account for rank-to-rank bus delay requirements
+ cmd_dly = tBURST + tCS;
+ }
+ banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly,
+ banks[j][i].colAllowedAt);
+ }
+ }
// Save rank of current access
activeRank = dram_pkt->rank;
@@ -1184,15 +1268,8 @@ DRAMCtrl::processNextReqEvent()
// that we are allowed to prepare a new bank, but not issue a
// read command until after tWTR, in essence we capture a
// bubble on the data bus that is tWTR + tCL
- if (switched_cmd_type) {
- // add a bubble to the data bus for write-to-read turn around
- // or tCS (different rank bus delay).
- busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL :
- tCS;
- } else if (dram_pkt->rank != activeRank) {
- // add a bubble to the data bus, as defined by the
- // tCS parameter for rank-to-rank delay
- busBusyUntil += tCS;
+ if (switched_cmd_type && dram_pkt->rank == activeRank) {
+ busBusyUntil += tWTR + tCL;
}
doDRAMAccess(dram_pkt);
@@ -1235,14 +1312,12 @@ DRAMCtrl::processNextReqEvent()
// sanity check
assert(dram_pkt->size <= burstSize);
- if (switched_cmd_type) {
- // add a bubble to the data bus, as defined by the
- // tRTW or tCS parameter, depending on whether changing ranks
- busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS;
- } else if (dram_pkt->rank != activeRank) {
- // add a bubble to the data bus, as defined by the
- // tCS parameter for rank-to-rank delay
- busBusyUntil += tCS;
+ // add a bubble to the data bus, as defined by the
+ // tRTW when access is to the same rank as previous burst
+ // Different rank timing is handled with tCS, which is
+ // applied to colAllowedAt
+ if (switched_cmd_type && dram_pkt->rank == activeRank) {
+ busBusyUntil += tRTW;
}
doDRAMAccess(dram_pkt);
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 8dcbe817b..cc2bd13fd 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -158,6 +158,7 @@ class DRAMCtrl : public AbstractMemory
uint32_t openRow;
uint8_t rank;
uint8_t bank;
+ uint8_t bankgr;
Tick colAllowedAt;
Tick preAllowedAt;
@@ -167,7 +168,7 @@ class DRAMCtrl : public AbstractMemory
uint32_t bytesAccessed;
Bank() :
- openRow(NO_ROW), rank(0), bank(0),
+ openRow(NO_ROW), rank(0), bank(0), bankgr(0),
colAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
rowAccesses(0), bytesAccessed(0)
{ }
@@ -470,6 +471,8 @@ class DRAMCtrl : public AbstractMemory
const uint32_t columnsPerRowBuffer;
const uint32_t columnsPerStripe;
const uint32_t ranksPerChannel;
+ const uint32_t bankGroupsPerRank;
+ const bool bankGroupArch;
const uint32_t banksPerRank;
const uint32_t channels;
uint32_t rowsPerBank;
@@ -490,6 +493,7 @@ class DRAMCtrl : public AbstractMemory
const Tick tRTW;
const Tick tCS;
const Tick tBURST;
+ const Tick tCCD_L;
const Tick tRCD;
const Tick tCL;
const Tick tRP;
@@ -499,6 +503,7 @@ class DRAMCtrl : public AbstractMemory
const Tick tRFC;
const Tick tREFI;
const Tick tRRD;
+ const Tick tRRD_L;
const Tick tXAW;
const uint32_t activationLimit;