diff options
Diffstat (limited to 'src/mem')
-rw-r--r-- | src/mem/DRAMCtrl.py | 38 | ||||
-rw-r--r-- | src/mem/dram_ctrl.cc | 127 | ||||
-rw-r--r-- | src/mem/dram_ctrl.hh | 7 |
3 files changed, 143 insertions, 29 deletions
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py index b06b8e7eb..02fa4fa46 100644 --- a/src/mem/DRAMCtrl.py +++ b/src/mem/DRAMCtrl.py @@ -111,6 +111,11 @@ class DRAMCtrl(AbstractMemory): "device/chip") devices_per_rank = Param.Unsigned("Number of devices/chips per rank") ranks_per_channel = Param.Unsigned("Number of ranks per channel") + + # default to 0 bank groups per rank, indicating bank group architecture + # is not used + # update per memory class when bank group architecture is supported + bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank") banks_per_rank = Param.Unsigned("Number of banks per rank") # only used for the address mapping as the controller by # construction is a single channel and multiple controllers have @@ -147,8 +152,17 @@ class DRAMCtrl(AbstractMemory): # This parameter has to account for burst length. # Read/Write requests with data size larger than one full burst are broken # down into multiple requests in the controller + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)") + # CAS-to-CAS delay for bursts to the same bank group + # only utilized with bank group architectures; set to 0 for default case + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay") + # time taken to complete one refresh cycle (N rows in all banks) tRFC = Param.Latency("Refresh cycle time") @@ -171,6 +185,9 @@ class DRAMCtrl(AbstractMemory): # minimum row activate to row activate delay time tRRD = Param.Latency("ACT to ACT delay") + # only utilized with bank group architectures; set to 0 for default case + tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay") + # time window in which a maximum number of activates are allowed # to take place, set to 0 to disable tXAW = Param.Latency("X activation window") @@ -274,6 +291,10 @@ class DDR4_2400_x64(DRAMCtrl): # Use a single rank ranks_per_channel = 1 + # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups + # Set to 4 for x4, x8 case + bank_groups_per_rank = 4 + # DDR4 has 16 banks (4 bank groups) in all # configurations. Currently we do not capture the additional # constraints incurred by the bank groups @@ -283,16 +304,29 @@ class DDR4_2400_x64(DRAMCtrl): tCK = '0.833ns' # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) tBURST = '3.333ns' + # @2400 data rate, tCCD_L is 6 CK + # CAS-to-CAS delay for bursts to the same bank group + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = '5ns'; + # DDR4-2400 17-17-17 tRCD = '14.16ns' tCL = '14.16ns' tRP = '14.16ns' tRAS = '32ns' - # Here using the average of RRD_S and RRD_L - tRRD = '4.1ns' + # RRD_S (different bank group) for 1K page is MAX(4 CK, 3.3ns) + tRRD = '3.3ns' + + # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns) + tRRD_L = '4.9ns'; + tXAW = '21ns' activation_limit = 4 tRFC = '260ns' diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index ca562f4f7..38c240fcf 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -69,6 +69,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : columnsPerRowBuffer(rowBufferSize / burstSize), columnsPerStripe(range.granularity() / burstSize), ranksPerChannel(p->ranks_per_channel), + bankGroupsPerRank(p->bank_groups_per_rank), + bankGroupArch(p->bank_groups_per_rank > 0), banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0), readBufferSize(p->read_buffer_size), writeBufferSize(p->write_buffer_size), @@ -77,9 +79,9 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : minWritesPerSwitch(p->min_writes_per_switch), writesThisTime(0), readsThisTime(0), tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST), - tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR), - tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), - tXAW(p->tXAW), activationLimit(p->activation_limit), + tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), + tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), + tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit), memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping), pageMgmt(p->page_policy), maxAccessesPerRow(p->max_accesses_per_row), @@ -104,6 +106,19 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : for (int b = 0; b < banksPerRank; b++) { banks[r][b].rank = r; banks[r][b].bank = b; + if (bankGroupArch) { + // Simply assign lower bits to bank group in order to + // rotate across bank groups as banks are incremented + // e.g. with 4 banks per bank group and 16 banks total: + // banks 0,4,8,12 are in bank group 0 + // banks 1,5,9,13 are in bank group 1 + // banks 2,6,10,14 are in bank group 2 + // banks 3,7,11,15 are in bank group 3 + banks[r][b].bankgr = b % bankGroupsPerRank; + } else { + // No bank groups; simply assign to bank number + banks[r][b].bankgr = b; + } } } @@ -168,6 +183,35 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n", tREFI, tRP, tRFC); } + + // basic bank group architecture checks -> + if (bankGroupArch) { + // must have at least one bank per bank group + if (bankGroupsPerRank > banksPerRank) { + fatal("banks per rank (%d) must be equal to or larger than " + "banks groups per rank (%d)\n", + banksPerRank, bankGroupsPerRank); + } + // must have same number of banks in each bank group + if ((banksPerRank % bankGroupsPerRank) != 0) { + fatal("Banks per rank (%d) must be evenly divisible by bank groups " + "per rank (%d) for equal banks per bank group\n", + banksPerRank, bankGroupsPerRank); + } + // tCCD_L should be greater than minimal, back-to-back burst delay + if (tCCD_L <= tBURST) { + fatal("tCCD_L (%d) should be larger than tBURST (%d) when " + "bank groups per rank (%d) is greater than 1\n", + tCCD_L, tBURST, bankGroupsPerRank); + } + // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay + if (tRRD_L <= tRRD) { + fatal("tRRD_L (%d) should be larger than tRRD (%d) when " + "bank groups per rank (%d) is greater than 1\n", + tRRD_L, tRRD, bankGroupsPerRank); + } + } + } void @@ -824,14 +868,25 @@ DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row) bank.preAllowedAt = act_tick + tRAS; // Respect the row-to-column command delay - bank.colAllowedAt = act_tick + tRCD; + bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt); // start by enforcing tRRD for(int i = 0; i < banksPerRank; i++) { // next activate to any bank in this rank must not happen // before tRRD - banks[rank][i].actAllowedAt = std::max(act_tick + tRRD, - banks[rank][i].actAllowedAt); + if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) { + // bank group architecture requires longer delays between + // ACT commands within the same bank group. Use tRRD_L + // in this case + banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L, + banks[rank][i].actAllowedAt); + } else { + // use shorter tRRD value when either + // 1) bank group architecture is not supportted + // 2) bank is in a different bank group + banks[rank][i].actAllowedAt = std::max(act_tick + tRRD, + banks[rank][i].actAllowedAt); + } } // next, we deal with tXAW, if the activation limit is disabled @@ -986,9 +1041,38 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) // only one burst can use the bus at any one point in time assert(dram_pkt->readyTime - busBusyUntil >= tBURST); - // not strictly necessary, but update the time for the next - // read/write (add a max with tCCD here) - bank.colAllowedAt = cmd_at + tBURST; + // update the time for the next read/write burst for each + // bank (add a max with tCCD/tCCD_L here) + Tick cmd_dly; + for(int j = 0; j < ranksPerChannel; j++) { + for(int i = 0; i < banksPerRank; i++) { + // next burst to same bank group in this rank must not happen + // before tCCD_L. Different bank group timing requirement is + // tBURST; Add tCS for different ranks + if (dram_pkt->rank == j) { + if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) { + // bank group architecture requires longer delays between + // RD/WR burst commands to the same bank group. + // Use tCCD_L in this case + cmd_dly = tCCD_L; + } else { + // use tBURST (equivalent to tCCD_S), the shorter + // cas-to-cas delay value, when either: + // 1) bank group architecture is not supportted + // 2) bank is in a different bank group + cmd_dly = tBURST; + } + } else { + // different rank is by default in a different bank group + // use tBURST (equivalent to tCCD_S), which is the shorter + // cas-to-cas delay in this case + // Add tCS to account for rank-to-rank bus delay requirements + cmd_dly = tBURST + tCS; + } + banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly, + banks[j][i].colAllowedAt); + } + } // Save rank of current access activeRank = dram_pkt->rank; @@ -1184,15 +1268,8 @@ DRAMCtrl::processNextReqEvent() // that we are allowed to prepare a new bank, but not issue a // read command until after tWTR, in essence we capture a // bubble on the data bus that is tWTR + tCL - if (switched_cmd_type) { - // add a bubble to the data bus for write-to-read turn around - // or tCS (different rank bus delay). - busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL : - tCS; - } else if (dram_pkt->rank != activeRank) { - // add a bubble to the data bus, as defined by the - // tCS parameter for rank-to-rank delay - busBusyUntil += tCS; + if (switched_cmd_type && dram_pkt->rank == activeRank) { + busBusyUntil += tWTR + tCL; } doDRAMAccess(dram_pkt); @@ -1235,14 +1312,12 @@ DRAMCtrl::processNextReqEvent() // sanity check assert(dram_pkt->size <= burstSize); - if (switched_cmd_type) { - // add a bubble to the data bus, as defined by the - // tRTW or tCS parameter, depending on whether changing ranks - busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS; - } else if (dram_pkt->rank != activeRank) { - // add a bubble to the data bus, as defined by the - // tCS parameter for rank-to-rank delay - busBusyUntil += tCS; + // add a bubble to the data bus, as defined by the + // tRTW when access is to the same rank as previous burst + // Different rank timing is handled with tCS, which is + // applied to colAllowedAt + if (switched_cmd_type && dram_pkt->rank == activeRank) { + busBusyUntil += tRTW; } doDRAMAccess(dram_pkt); diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index 8dcbe817b..cc2bd13fd 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -158,6 +158,7 @@ class DRAMCtrl : public AbstractMemory uint32_t openRow; uint8_t rank; uint8_t bank; + uint8_t bankgr; Tick colAllowedAt; Tick preAllowedAt; @@ -167,7 +168,7 @@ class DRAMCtrl : public AbstractMemory uint32_t bytesAccessed; Bank() : - openRow(NO_ROW), rank(0), bank(0), + openRow(NO_ROW), rank(0), bank(0), bankgr(0), colAllowedAt(0), preAllowedAt(0), actAllowedAt(0), rowAccesses(0), bytesAccessed(0) { } @@ -470,6 +471,8 @@ class DRAMCtrl : public AbstractMemory const uint32_t columnsPerRowBuffer; const uint32_t columnsPerStripe; const uint32_t ranksPerChannel; + const uint32_t bankGroupsPerRank; + const bool bankGroupArch; const uint32_t banksPerRank; const uint32_t channels; uint32_t rowsPerBank; @@ -490,6 +493,7 @@ class DRAMCtrl : public AbstractMemory const Tick tRTW; const Tick tCS; const Tick tBURST; + const Tick tCCD_L; const Tick tRCD; const Tick tCL; const Tick tRP; @@ -499,6 +503,7 @@ class DRAMCtrl : public AbstractMemory const Tick tRFC; const Tick tREFI; const Tick tRRD; + const Tick tRRD_L; const Tick tXAW; const uint32_t activationLimit; |