summaryrefslogtreecommitdiff
path: root/src/northbridge/amd/amdmct
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineeringinc.com>2015-08-07 19:05:29 -0500
committerStefan Reinauer <stefan.reinauer@coreboot.org>2015-11-23 18:36:44 +0100
commit845b00ce3344d1483d98cddcf59f317a1b96da64 (patch)
tree032d4c7d733897f213655420124946da785ed775 /src/northbridge/amd/amdmct
parent7e1465431a7869656026f435d7ad2b30f4e8e359 (diff)
downloadcoreboot-845b00ce3344d1483d98cddcf59f317a1b96da64.tar.xz
amd/amdmct/mct_ddr3: Fix poor performance on Family 15h CPUs
Change-Id: Ib6bc197e43e40ba2b923b1eb1229bacafc8be360 Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> Reviewed-on: http://review.coreboot.org/12029 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
Diffstat (limited to 'src/northbridge/amd/amdmct')
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mct_d.c371
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mct_d.h1
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c65
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctproc.c49
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c216
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctwl.c4
6 files changed, 614 insertions, 92 deletions
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
index d76eea0e2a..aad813a056 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
@@ -32,6 +32,8 @@
* supported.
*/
+// #define DEBUG_DIMM_SPD 1
+
static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA);
static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
@@ -168,7 +170,8 @@ static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel);
static u32 mct_DramTermDyn_RDimm(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dimm);
-static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2);
+static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat,
+ uint8_t dct, uint32_t misc2, uint32_t DramControl);
static void mct_BeforeDQSTrainSamp(struct DCTStatStruc *pDCTstat);
static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA, uint8_t Pass);
@@ -1366,6 +1369,8 @@ static uint8_t fam15h_slow_access_mode(struct DCTStatStruc *pDCTstat, uint8_t dc
static void set_2t_configuration(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct)
{
+ printk(BIOS_DEBUG, "%s: Start\n", __func__);
+
uint32_t dev;
uint32_t reg;
uint32_t dword;
@@ -1388,6 +1393,8 @@ static void set_2t_configuration(struct MCTStatStruc *pMCTstat,
else
dword &= ~(0x1 << 20); /* Clear 2T CMD mode */
Set_NB32_DCT(dev, dct, reg, dword);
+
+ printk(BIOS_DEBUG, "%s: Done\n", __func__);
}
static void precise_ndelay_fam15(struct MCTStatStruc *pMCTstat, uint32_t nanoseconds) {
@@ -2019,6 +2026,8 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
/* Disable training mode */
uint8_t lane;
uint8_t dimm;
+ uint16_t max_cdd_we_delta;
+ uint16_t cdd_trwtto_we_delta;
uint8_t receiver;
uint8_t max_lane;
uint8_t ecc_enabled;
@@ -2033,21 +2042,37 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
uint16_t twrwrdd;
uint16_t cdd_twrwrdd;
uint16_t twrrd;
+ uint16_t cdd_twrrd;
+ uint16_t cdd_trwtto;
uint16_t trwtto;
uint8_t first_dimm;
uint16_t delay;
uint16_t delay2;
+ uint8_t min_value;
+ uint8_t write_early;
uint8_t read_odt_delay;
uint8_t write_odt_delay;
+ uint8_t buffer_data_delay;
+ int16_t latency_difference;
uint16_t difference;
uint16_t current_total_delay_1[MAX_BYTE_LANES];
uint16_t current_total_delay_2[MAX_BYTE_LANES];
+ uint8_t ddr_voltage_index;
+ uint8_t max_dimms_installable;
/* FIXME
* This should be platform configurable
*/
uint8_t dimm_event_l_pin_support = 0;
+ if (pDCTstat->DIMMValidDCT[dct] == 0)
+ ddr_voltage_index = 1;
+ else
+ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
+
+ ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct);
+ max_dimms_installable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH);
+
ecc_enabled = !!(pMCTstat->GStatus & 1 << GSB_ECCDIMMs);
if (ecc_enabled)
max_lane = 9;
@@ -2081,6 +2106,24 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
else
write_odt_delay = 0;
+ dword = (Get_NB32_DCT(dev, dct, 0xa8) >> 24) & 0x3;
+ write_early = dword / 2;
+
+ latency_difference = Get_NB32_DCT(dev, dct, 0x200) & 0x1f;
+ dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f;
+ latency_difference -= dword;
+
+ if (pDCTstat->Status & (1 << SB_LoadReduced)) {
+ /* LRDIMM */
+
+ /* TODO
+ * Implement LRDIMM support
+ * See Fam15h BKDG Rev. 3.14 section 2.10.5.5
+ */
+ } else {
+ buffer_data_delay = 0;
+ }
+
/* TODO:
* Adjust trdrdsddc if four-rank DIMMs are installed per
* section 2.10.5.5.1 of the Family 15h BKDG.
@@ -2116,7 +2159,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
}
/* Convert the difference to MEMCLKs */
- cdd_trdrddd = (((cdd_trdrddd >> 5) & 0x1f) + 1) / 2;
+ cdd_trdrddd = (((cdd_trdrddd + (1 << 6) - 1) >> 6) & 0xf);
/* Calculate Trdrddd */
delay = (read_odt_delay + 3) * 2;
@@ -2162,7 +2205,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
}
/* Convert the difference to MEMCLKs */
- cdd_twrwrdd = (((cdd_twrwrdd >> 5) & 0x1f) + 1) / 2;
+ cdd_twrwrdd = (((cdd_twrwrdd + (1 << 6) - 1) >> 6) & 0xf);
/* Calculate Twrwrdd */
delay = (write_odt_delay + 3) * 2;
@@ -2181,6 +2224,107 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
dword &= ~(0x1 << 18); /* DisAutoRefresh = 0 */
Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */
+ /* Configure power saving options */
+ dword = Get_NB32_DCT(dev, dct, 0xa8); /* Dram Miscellaneous 2 */
+ dword |= (0x1 << 22); /* PrtlChPDEnhEn = 0x1 */
+ dword |= (0x1 << 21); /* AggrPDEn = 0x1 */
+ Set_NB32_DCT(dev, dct, 0xa8, dword); /* Dram Miscellaneous 2 */
+
+ /* Configure partial power down delay */
+ dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */
+ dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */
+ dword |= 0x2;
+ Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */
+
+ /* Configure power save delays */
+ delay = 0xa;
+ delay2 = 0x3;
+
+ /* Family 15h BKDG Table 214 */
+ if ((pDCTstat->Status & (1 << SB_Registered))
+ || (pDCTstat->Status & (1 << SB_LoadReduced))) {
+ if (memclk_index <= 0x6) {
+ if (ddr_voltage_index < 0x4)
+ /* 1.5 or 1.35V */
+ delay2 = 0x3;
+ else
+ /* 1.25V */
+ delay2 = 0x4;
+ }
+ else if ((memclk_index == 0xa)
+ || (memclk_index == 0xe))
+ delay2 = 0x4;
+ else if (memclk_index == 0x12)
+ delay2 = 0x5;
+ else if (memclk_index == 0x16)
+ delay2 = 0x6;
+ } else {
+ if (memclk_index <= 0x6)
+ delay2 = 0x3;
+ else if ((memclk_index == 0xa)
+ || (memclk_index == 0xe))
+ delay2 = 0x4;
+ else if (memclk_index == 0x12)
+ delay2 = 0x5;
+ else if (memclk_index == 0x16)
+ delay2 = 0x6;
+ }
+
+ /* Family 15h BKDG Table 215 */
+ if (memclk_index <= 0x6)
+ delay = 0xa;
+ else if (memclk_index == 0xa)
+ delay = 0xd;
+ else if (memclk_index == 0xe)
+ delay = 0x10;
+ else if (memclk_index == 0x12)
+ delay = 0x14;
+ else if (memclk_index == 0x16)
+ delay = 0x17;
+
+ dword = Get_NB32_DCT(dev, dct, 0x248); /* Dram Power Management 0 */
+ dword &= ~(0x3f << 24); /* AggrPDDelay = 0x0 */
+ dword &= ~(0x3f << 16); /* PchgPDEnDelay = 0x1 */
+ dword |= (0x1 << 16);
+ dword &= ~(0x1f << 8); /* Txpdll = delay */
+ dword |= ((delay & 0x1f) << 8);
+ dword &= ~0xf; /* Txp = delay2 */
+ dword |= delay2 & 0xf;
+ Set_NB32_DCT(dev, dct, 0x248, dword); /* Dram Power Management 0 */
+
+ /* Family 15h BKDG Table 216 */
+ if (memclk_index <= 0x6) {
+ delay = 0x5;
+ delay2 = 0x3;
+ }
+ else if (memclk_index == 0xa) {
+ delay = 0x6;
+ delay2 = 0x3;
+ }
+ else if (memclk_index == 0xe) {
+ delay = 0x7;
+ delay2 = 0x4;
+ }
+ else if (memclk_index == 0x12) {
+ delay = 0x8;
+ delay2 = 0x4;
+ }
+ else if (memclk_index == 0x16) {
+ delay = 0xa;
+ delay2 = 0x5;
+ }
+
+ dword = Get_NB32_DCT(dev, dct, 0x24c); /* Dram Power Management 1 */
+ dword &= ~(0x3f << 24); /* Tcksrx = delay */
+ dword |= ((delay & 0x3f) << 24);
+ dword &= ~(0x3f << 16); /* Tcksre = delay */
+ dword |= ((delay & 0x3f) << 16);
+ dword &= ~(0x3f << 8); /* Tckesr = delay2 + 1 */
+ dword |= (((delay2 + 1) & 0x3f) << 8);
+ dword &= ~0xf; /* Tpd = delay2 */
+ dword |= delay2 & 0xf;
+ Set_NB32_DCT(dev, dct, 0x24c, dword); /* Dram Power Management 1 */
+
dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */
dword |= (0xf << 24); /* DcqBypassMax = 0xf */
dword |= (0x1 << 22); /* BankSwizzleMode = 1 */
@@ -2233,15 +2377,98 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
}
}
- /* TODO
- * Calculate Twrrd per section 2.10.5.5.3 of the Family 15h BKDG
- */
- twrrd = 0xb;
+ /* Calculate the Critical Delay Difference for Twrrd */
+ cdd_twrrd = 0;
+ for (receiver = 0; receiver < 8; receiver += 2) {
+ dimm = (receiver >> 1);
- /* TODO
- * Calculate TrwtTO per section 2.10.5.5.4 of the Family 15h BKDG
- */
- trwtto = 0x16;
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver))
+ continue;
+
+ read_dqs_write_timing_control_registers(current_total_delay_1, dev, dct, dimm, index_reg);
+ read_dqs_receiver_enable_control_registers(current_total_delay_2, dev, dct, dimm, index_reg);
+
+ for (lane = 0; lane < max_lane; lane++) {
+ if (current_total_delay_1[lane] > current_total_delay_2[lane])
+ difference = current_total_delay_1[lane] - current_total_delay_2[lane];
+ else
+ difference = current_total_delay_2[lane] - current_total_delay_1[lane];
+
+ if (difference > cdd_twrrd)
+ cdd_twrrd = difference;
+ }
+ }
+
+ /* Convert the difference to MEMCLKs */
+ cdd_twrrd = (((cdd_twrrd + (1 << 6) - 1) >> 6) & 0xf);
+
+ /* Fam15h BKDG section 2.10.5.5.3 */
+ if (pDCTstat->Status & (1 << SB_LoadReduced)) {
+ /* LRDIMM */
+
+ /* TODO
+ * Implement LRDIMM support
+ * See Fam15h BKDG Rev. 3.14 section 2.10.5.5
+ */
+ twrrd = 0xb;
+ } else {
+ max_cdd_we_delta = (((int16_t)cdd_twrrd + 1 - ((int16_t)write_early * 2)) + 1) / 2;
+ if (max_cdd_we_delta < 0)
+ max_cdd_we_delta = 0;
+ if (((uint16_t)max_cdd_we_delta) > write_odt_delay)
+ dword = max_cdd_we_delta;
+ else
+ dword = write_odt_delay;
+ dword += 3;
+ if (latency_difference < dword) {
+ dword -= latency_difference;
+ if (dword < 1)
+ twrrd = 1;
+ else
+ twrrd = dword;
+ } else {
+ twrrd = 1;
+ }
+ }
+
+ /* Calculate the Critical Delay Difference for TrwtTO */
+ cdd_trwtto = 0;
+ for (receiver = 0; receiver < 8; receiver += 2) {
+ dimm = (receiver >> 1);
+
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver))
+ continue;
+
+ read_dqs_receiver_enable_control_registers(current_total_delay_1, dev, dct, dimm, index_reg);
+ read_dqs_write_timing_control_registers(current_total_delay_2, dev, dct, dimm, index_reg);
+
+ for (lane = 0; lane < max_lane; lane++) {
+ if (current_total_delay_1[lane] > current_total_delay_2[lane])
+ difference = current_total_delay_1[lane] - current_total_delay_2[lane];
+ else
+ difference = current_total_delay_2[lane] - current_total_delay_1[lane];
+
+ if (difference > cdd_trwtto)
+ cdd_trwtto = difference;
+ }
+ }
+
+ /* Convert the difference to MEMCLKs */
+ cdd_trwtto = (((cdd_trwtto + (1 << 6) - 1) >> 6) & 0xf);
+
+ /* Fam15h BKDG section 2.10.5.5.4 */
+ if (max_dimms_installable == 1)
+ min_value = 0;
+ else
+ min_value = read_odt_delay + buffer_data_delay;
+ cdd_trwtto_we_delta = (((int16_t)cdd_trwtto - 1 + ((int16_t)write_early * 2)) + 1) / 2;
+ cdd_trwtto_we_delta += latency_difference + 3;
+ if (cdd_trwtto_we_delta < 0)
+ cdd_trwtto_we_delta = 0;
+ if ((cdd_trwtto_we_delta) > min_value)
+ trwtto = cdd_trwtto_we_delta;
+ else
+ trwtto = min_value;
dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */
dword &= ~(0x1 << 11); /* BwCapEn = 0 */
@@ -2252,6 +2479,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */
dword &= ~(0x1 << 2); /* DctSelIntLvEn = interleave_channels */
dword |= (interleave_channels & 0x1) << 2;
+ dword |= (0x3 << 6); /* DctSelIntLvAddr = 0x3 */
Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */
/* NOTE
@@ -2259,22 +2487,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
* otherwise semi-random lockups will occur due to misconfigured scrubbing hardware!
*/
- /* FIXME
- * The BKDG-recommended settings cause memory corruption on the ASUS KGPE-D16.
- * Investigate and fix...
- */
-#if 0
- /* Fam15h BKDG section 2.10.5.5.1 */
- dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
- dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */
- dword |= (0x1 << 24);
- dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */
- dword |= ((trdrdsddc & 0xf) << 16);
- dword &= ~(0xf); /* TrdrdDd = trdrddd */
- dword |= (trdrddd & 0xf);
- Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
-#endif
-
/* Fam15h BKDG section 2.10.5.5.2 */
dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */
dword &= ~(0xf << 16); /* TwrwrSdSc = 0x1 */
@@ -2287,8 +2499,14 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
/* Fam15h BKDG section 2.10.5.5.3 */
dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */
+ dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */
+ dword |= (0x1 << 24);
+ dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */
+ dword |= ((trdrdsddc & 0xf) << 16);
dword &= ~(0xf << 8); /* Twrrd = twrrd */
dword |= ((twrrd & 0xf) << 8);
+ dword &= ~(0xf); /* TrdrdDd = trdrddd */
+ dword |= (trdrddd & 0xf);
Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */
/* Fam15h BKDG section 2.10.5.5.4 */
@@ -2299,12 +2517,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat,
dword |= ((((dword >> 8) & 0x1f) + 1) << 16);
Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */
- /* Configure partial power down delay */
- dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */
- dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */
- dword |= 0x2;
- Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */
-
/* Enable prefetchers */
dword = Get_NB32(dev, 0x11c); /* Memory Controller Configuration High */
dword &= ~(0x1 << 13); /* PrefIoDis = 0 */
@@ -2393,6 +2605,8 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
mct_TrainDQSPos_D(pMCTstat, pDCTstatA);
+ TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA);
+
if (is_fam15h())
exit_training_mode_fam15(pMCTstat, pDCTstatA);
else
@@ -2971,6 +3185,13 @@ static void ClearDCT_D(struct MCTStatStruc *pMCTstat,
}
while(reg < reg_end) {
+ if ((reg & 0xFF) == 0x84) {
+ if (is_fam15h()) {
+ val = Get_NB32_DCT(dev, dct, reg);
+ val &= ~(0x1 << 23); /* Clear PchgPDModeSel */
+ val &= ~0x3; /* Clear BurstCtrl */
+ }
+ }
if ((reg & 0xFF) == 0x90) {
if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
val = Get_NB32_DCT(dev, dct, reg); /* get DRAMConfigLow */
@@ -3089,14 +3310,30 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
/* Convert DRAM CycleTiming values and store into DCT structure */
byte = pDCTstat->DIMMAutoSpeed;
- if (byte == 7)
- tCK16x = 20;
- else if (byte == 6)
- tCK16x = 24;
- else if (byte == 5)
- tCK16x = 30;
- else
- tCK16x = 40;
+ if (is_fam15h()) {
+ if (byte == 0x16)
+ tCK16x = 17;
+ else if (byte == 0x12)
+ tCK16x = 20;
+ else if (byte == 0xe)
+ tCK16x = 24;
+ else if (byte == 0xa)
+ tCK16x = 30;
+ else if (byte == 0x6)
+ tCK16x = 40;
+ else
+ tCK16x = 48;
+ }
+ else {
+ if (byte == 7)
+ tCK16x = 20;
+ else if (byte == 6)
+ tCK16x = 24;
+ else if (byte == 5)
+ tCK16x = 30;
+ else
+ tCK16x = 40;
+ }
/* Notes:
1. All secondary time values given in SPDs are in binary with units of ns.
@@ -3129,7 +3366,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
val = Max_TrpT;
pDCTstat->Trp = val;
- /*Trrd*/
+ /* Trrd */
pDCTstat->DIMMTrrd = Trrd;
val = Trrd / tCK16x;
if (Trrd % tCK16x) { /* round up number of busclocks */
@@ -3247,21 +3484,31 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(dev, dct, 0x200); /* DRAM Timing 0 */
dword &= ~(0x3f1f1f1f);
- dword |= ((pDCTstat->Tras + 0xf) & 0x3f) << 24; /* Tras */
- dword |= ((pDCTstat->Trp + 0x5) & 0x1f) << 16; /* Trp */
- dword |= ((pDCTstat->Trcd + 0x5) & 0x1f) << 8; /* Trcd */
+ dword |= (pDCTstat->Tras & 0x3f) << 24; /* Tras */
+ val = pDCTstat->Trp;
+ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
+ dword |= (val & 0x1f) << 16; /* Trp */
+ dword |= (pDCTstat->Trcd & 0x1f) << 8; /* Trcd */
dword |= (pDCTstat->CASL & 0x1f); /* Tcl */
Set_NB32_DCT(dev, dct, 0x200, dword); /* DRAM Timing 0 */
dword = Get_NB32_DCT(dev, dct, 0x204); /* DRAM Timing 1 */
dword &= ~(0x0f3f0f3f);
- dword |= ((pDCTstat->Trtp + 0x4) & 0xf) << 24; /* Trtp */
- if (pDCTstat->Tfaw != 0)
- dword |= ((((pDCTstat->Tfaw - 0x1) * 2) + 0x10) & 0x3f) << 16; /* FourActWindow */
- dword |= ((pDCTstat->Trrd + 0x4) & 0xf) << 8; /* Trrd */
- dword |= ((pDCTstat->Trc + 0xb) & 0x3f); /* Trc */
+ dword |= (pDCTstat->Trtp & 0xf) << 24; /* Trtp */
+ if (pDCTstat->Tfaw != 0) {
+ val = pDCTstat->Tfaw;
+ val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val);
+ if ((val > 0x5) && (val < 0x2b))
+ dword |= (val & 0x3f) << 16; /* FourActWindow */
+ }
+ dword |= (pDCTstat->Trrd & 0xf) << 8; /* Trrd */
+ dword |= (pDCTstat->Trc & 0x3f); /* Trc */
Set_NB32_DCT(dev, dct, 0x204, dword); /* DRAM Timing 1 */
+ /* Trfc0-Trfc3 */
+ for (i=0; i<4; i++)
+ if (pDCTstat->Trfc[i] == 0x0)
+ pDCTstat->Trfc[i] = 0x4;
dword = Get_NB32_DCT(dev, dct, 0x208); /* DRAM Timing 2 */
dword &= ~(0x07070707);
dword |= (pDCTstat->Trfc[3] & 0x7) << 24; /* Trfc3 */
@@ -3272,14 +3519,14 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(dev, dct, 0x20c); /* DRAM Timing 3 */
dword &= ~(0x00000f00);
- dword |= ((pDCTstat->Twtr + 0x4) & 0xf) << 8; /* Twtr */
+ dword |= (pDCTstat->Twtr & 0xf) << 8; /* Twtr */
dword &= ~(0x0000001f);
dword |= (Tcwl & 0x1f); /* Tcwl */
Set_NB32_DCT(dev, dct, 0x20c, dword); /* DRAM Timing 3 */
dword = Get_NB32_DCT(dev, dct, 0x22c); /* DRAM Timing 10 */
dword &= ~(0x0000001f);
- dword |= ((pDCTstat->Twr + 0x4) & 0x1f); /* Twr */
+ dword |= (pDCTstat->Twr & 0x1f); /* Twr */
Set_NB32_DCT(dev, dct, 0x22c, dword); /* DRAM Timing 10 */
if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) {
@@ -3875,6 +4122,8 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
}
}
+ DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2, DramControl);
+
printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %08x\n", DramControl);
printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %08x\n", DramTimingLo);
printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %08x\n", DramConfigMisc);
@@ -3886,7 +4135,6 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
Set_NB32_DCT(dev, dct, 0x78, DramControl);
Set_NB32_DCT(dev, dct, 0x88, DramTimingLo);
Set_NB32_DCT(dev, dct, 0xa0, DramConfigMisc);
- DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2);
Set_NB32_DCT(dev, dct, 0xa8, DramConfigMisc2);
Set_NB32_DCT(dev, dct, 0x90, DramConfigLo);
ProgDramMRSReg_D(pMCTstat, pDCTstat, dct);
@@ -5257,6 +5505,16 @@ static void mct_PhyController_Config(struct MCTStatStruc *pMCTstat,
u32 dev = pDCTstat->dev_dct;
if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3 | AMD_FAM15_ALL)) {
+ if (is_fam15h()) {
+ /* Set F2x[1, 0]98_x0D0F0F13 DllDisEarlyU and DllDisEarlyL to save power */
+ for (index = 0; index < 0x9; index++) {
+ dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8));
+ dword |= (0x1 << 1); /* DllDisEarlyU = 1 */
+ dword |= 0x1; /* DllDisEarlyL = 1 */
+ Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8), dword);
+ }
+ }
+
if (pDCTstat->Dimmx4Present == 0) {
/* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for
* additional power saving when x4 DIMMs are not present.
@@ -5301,8 +5559,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
mct_ExtMCTConfig_Dx(pDCTstat);
} else {
/* Family 15h CPUs */
- val = 0x0ce00f00 | 0x1 << 29; /* FlushWrOnStpGnt */
- val |= 0x10 << 2; /* MctWrLimit = 16 */
+ val = 0x0ce00f00; /* FlushWrOnStpGnt = 0x0 */
+ val |= 0x10 << 2; /* MctWrLimit = 0x10 */
+ val |= 0x1; /* DctWrLimit = 0x1 */
Set_NB32(pDCTstat->dev_dct, 0x11c, val);
val = Get_NB32(pDCTstat->dev_dct, 0x1b0);
@@ -6543,8 +6802,8 @@ void ProgDramMRSReg_D(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x84);
if (is_fam15h()) {
- dword |= DramMRS;
dword &= ~0x00800003;
+ dword |= DramMRS;
} else {
dword &= ~0x00fc2f8f;
dword |= DramMRS;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
index eb4c74e309..b72b9da59a 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
@@ -984,6 +984,7 @@ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat
uint64_t mctGetLogicalCPUID(u32 Node);
u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass);
+void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index 3615616cd5..06597e23bb 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -20,6 +20,9 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_
static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay,
uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg);
+static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+
static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u16 like,
u8 scale, u8 ChipSel);
@@ -214,6 +217,27 @@ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat,
}
}
+void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+{
+ uint8_t node;
+ struct DCTStatStruc *pDCTstat;
+
+ for (node = 0; node < MAX_NODES_SUPPORTED; node++) {
+ pDCTstat = pDCTstatA + node;
+
+ if (pDCTstat->DCTSysLimit) {
+ if (is_fam15h()) {
+ dqsTrainMaxRdLatency_SW_Fam15(pMCTstat, pDCTstat);
+ } else {
+ /* FIXME
+ * Implement Family 10h MaxRdLatency training
+ */
+ }
+ }
+ }
+}
+
static void SetEccDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel)
{
@@ -894,7 +918,7 @@ static void TrainDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat,
* Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.5
*/
static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, uint8_t dct)
+ struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t calc_min)
{
uint8_t dimm;
uint8_t lane;
@@ -938,7 +962,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat,
p += (9 - dword);
/* 2.10.5.8.5 (4) */
- p += 5;
+ if (!calc_min)
+ p += 5;
/* 2.10.5.8.5 (5) */
dword = Get_NB32_DCT(dev, dct, 0xa8);
@@ -965,7 +990,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat,
p += (max_delay >> 5);
/* 2.10.5.8.5 (8) */
- p += 5;
+ if (!calc_min)
+ p += 5;
/* 2.10.5.8.5 (9) */
t += 800;
@@ -976,13 +1002,16 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat,
n = (((((uint64_t)p * 1000000000000ULL)/(((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL) * 2)) + ((uint64_t)t)) * ((uint64_t)nb_clk * 1000)) / 1000000000ULL;
/* 2.10.5.8.5 (11) */
- n -= 1;
+ if (!calc_min)
+ n -= 1;
/* 2.10.5.8.5 (12) */
- dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210);
- dword &= ~(0x3ff << 22);
- dword |= (((n - 1) & 0x3ff) << 22);
- Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword);
+ if (!calc_min) {
+ dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210);
+ dword &= ~(0x3ff << 22);
+ dword |= (((n - 1) & 0x3ff) << 22);
+ Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword);
+ }
/* Save result for later use */
pDCTstat->CH_MaxRdLat[dct] = n - 1;
@@ -1103,6 +1132,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
} else if (lane < 8) {
Set_NB32_DCT(dev, dct, 0x274, ~0x0);
Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8)));
+ } else if (lane == 0xff) {
+ Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff);
+ Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff);
} else {
Set_NB32_DCT(dev, dct, 0x274, ~0x0);
Set_NB32_DCT(dev, dct, 0x278, ~0x0);
@@ -1110,8 +1142,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(dev, dct, 0x27c);
dword &= ~(0xff); /* EccMask = 0 */
- if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
- dword |= 0xff; /* EccMask = 0xff */
+ if (lane != 0xff)
+ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
+ dword |= 0xff; /* EccMask = 0xff */
Set_NB32_DCT(dev, dct, 0x27c, dword);
dword = Get_NB32_DCT(dev, dct, 0x270);
@@ -1180,6 +1213,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
} else if (lane < 8) {
Set_NB32_DCT(dev, dct, 0x274, ~0x0);
Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8)));
+ } else if (lane == 0xff) {
+ Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff);
+ Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff);
} else {
Set_NB32_DCT(dev, dct, 0x274, ~0x0);
Set_NB32_DCT(dev, dct, 0x278, ~0x0);
@@ -1187,8 +1223,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat,
dword = Get_NB32_DCT(dev, dct, 0x27c);
dword &= ~(0xff); /* EccMask = 0 */
- if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
- dword |= 0xff; /* EccMask = 0xff */
+ if (lane != 0xff)
+ if ((lane != 8) || (pDCTstat->DimmECCPresent == 0))
+ dword |= 0xff; /* EccMask = 0xff */
Set_NB32_DCT(dev, dct, 0x27c, dword);
dword = Get_NB32_DCT(dev, dct, 0x270);
@@ -1274,7 +1311,7 @@ static uint8_t TrainDQSRdWrPos_D_Fam15(struct MCTStatStruc *pMCTstat,
uint32_t dev = pDCTstat->dev_dct;
/* Calculate and program MaxRdLatency */
- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct);
+ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0);
Errors = 0;
dual_rank = 0;
@@ -1632,7 +1669,7 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg);
/* Calculate and program MaxRdLatency */
- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct);
+ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0);
/* 2.10.5.8.3 (4 B) */
dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
index 09221935a3..cf13b40b31 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
@@ -15,7 +15,8 @@
*/
/* mct_SetDramConfigMisc2_Cx & mct_SetDramConfigMisc2_Dx */
-u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
+u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat,
+ uint8_t dct, uint32_t misc2, uint32_t DramControl)
{
u32 val;
@@ -24,17 +25,47 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
if (pDCTstat->LogicalCPUID & AMD_FAM15_ALL) {
uint8_t cs_mux_45;
uint8_t cs_mux_67;
+ uint32_t f2x80;
- /* BKDG v3.14 Table 200 / Table 201 */
- if (MaxDimmsInstallable < 3) {
- cs_mux_45 = 1;
- cs_mux_67 = 1;
- } else {
+ misc2 &= ~(0x1 << 28); /* FastSelfRefEntryDis = 0x0 */
+ if (MaxDimmsInstallable == 3) {
+ /* FIXME 3 DIMMS per channel unimplemented */
cs_mux_45 = 0;
+ } else {
+ uint32_t f2x60 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x60);
+ f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80);
+ if ((((f2x80 & 0xf) == 0x7) || ((f2x80 & 0xf) == 0x9))
+ && ((f2x60 & 0x3) == 0x3))
+ cs_mux_45 = 1;
+ else if ((((f2x80 & 0xa) == 0x7) || ((f2x80 & 0xb) == 0x9))
+ && ((f2x60 & 0x3) > 0x1))
+ cs_mux_45 = 1;
+ else
+ cs_mux_45 = 0;
+ }
+
+ if (MaxDimmsInstallable == 1) {
+ cs_mux_67 = 0;
+ } else if (MaxDimmsInstallable == 2) {
+ uint32_t f2x64 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x64);
+ f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80);
+ if (((((f2x80 >> 4) & 0xf) == 0x7) || (((f2x80 >> 4) & 0xf) == 0x9))
+ && ((f2x64 & 0x3) == 0x3))
+ cs_mux_67 = 1;
+ else if (((((f2x80 >> 4) & 0xa) == 0x7) || (((f2x80 >> 4) & 0xb) == 0x9))
+ && ((f2x64 & 0x3) > 0x1))
+ cs_mux_67 = 1;
+ else
+ cs_mux_67 = 0;
+ } else {
+ /* FIXME 3 DIMMS per channel unimplemented */
cs_mux_67 = 0;
}
- misc2 |= (cs_mux_45 & 0x1) << 26;
- misc2 |= (cs_mux_67 & 0x1) << 27;
+
+ misc2 &= ~(0x1 << 27); /* CsMux67 = cs_mux_67 */
+ misc2 |= ((cs_mux_67 & 0x1) << 27);
+ misc2 &= ~(0x1 << 26); /* CsMux45 = cs_mux_45 */
+ misc2 |= ((cs_mux_45 & 0x1) << 26);
} else if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx)) {
if (pDCTstat->Status & (1 << SB_Registered)) {
misc2 |= 1 << SubMemclkRegDly;
@@ -46,8 +77,8 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
misc2 |= 1 << OdtSwizzle;
- val = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x78);
+ val = DramControl;
val &= 7;
val = ((~val) & 0xff) + 1;
val += 6;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
index 19b1b8f1e7..b36ecae32f 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
@@ -541,9 +541,8 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
u32 dev;
u32 index_reg;
u32 ch_start, ch_end, ch;
- u32 msr;
+ msr_t msr;
u32 cr4;
- u32 lo, hi;
uint32_t dword;
uint8_t dimm;
@@ -594,15 +593,14 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
write_cr4(cr4);
- msr = HWCR;
- _RDMSR(msr, &lo, &hi);
+ msr = rdmsr(HWCR);
/* FIXME: Why use SSEDIS */
- if(lo & (1 << 17)) { /* save the old value */
+ if(msr.lo & (1 << 17)) { /* save the old value */
_Wrap32Dis = 1;
}
- lo |= (1 << 17); /* HWCR.wrap32dis */
- lo &= ~(1 << 15); /* SSEDIS */
- _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
+ msr.lo |= (1 << 17); /* HWCR.wrap32dis */
+ msr.lo &= ~(1 << 15); /* SSEDIS */
+ wrmsr(HWCR, msr); /* Setting wrap32dis allows 64-bit memory references in real mode */
_DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
@@ -935,10 +933,9 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat,
}
if(!_Wrap32Dis) {
- msr = HWCR;
- _RDMSR(msr, &lo, &hi);
- lo &= ~(1<<17); /* restore HWCR.wrap32dis */
- _WRMSR(msr, lo, hi);
+ msr = rdmsr(HWCR);
+ msr.lo &= ~(1<<17); /* restore HWCR.wrap32dis */
+ wrmsr(HWCR, msr);
}
if(!_SSE2){
cr4 = read_cr4();
@@ -1420,7 +1417,7 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
}
/* Calculate and program MaxRdLatency */
- Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel);
+ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 0);
if(_DisableDramECC) {
mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
@@ -1483,6 +1480,199 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat,
printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
}
+static void write_max_read_latency_to_registers(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, uint8_t dct, uint16_t latency)
+{
+ uint32_t dword;
+ uint8_t nb_pstate;
+
+ for (nb_pstate = 0; nb_pstate < 2; nb_pstate++) {
+ dword = Get_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210);
+ dword &= ~(0x3ff << 22);
+ dword |= ((latency & 0x3ff) << 22);
+ Set_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210, dword);
+ }
+}
+
+/* DQS MaxRdLatency Training (Family 15h)
+ * Algorithm detailed in:
+ * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.5.1
+ * This algorithm runs at the highest supported MEMCLK.
+ */
+static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+{
+ u8 Channel;
+ u8 Addl_Index = 0;
+ u8 Receiver;
+ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
+ u32 Errors;
+
+ u32 dev;
+ u32 index_reg;
+ u32 ch_start, ch_end;
+ u32 msr;
+ u32 cr4;
+ u32 lo, hi;
+
+ uint32_t dword;
+ uint8_t dimm;
+ uint8_t lane;
+ uint8_t mem_clk;
+ uint32_t nb_clk;
+ uint8_t nb_pstate;
+ uint16_t current_total_delay[MAX_BYTE_LANES];
+ uint16_t current_rdqs_total_delay[MAX_BYTE_LANES];
+ uint8_t current_worst_case_total_delay_dimm;
+ uint16_t current_worst_case_total_delay_value;
+
+ uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933};
+
+ print_debug_dqs("\nTrainMaxRdLatency: Node", pDCTstat->Node_ID, 0);
+
+ dev = pDCTstat->dev_dct;
+ index_reg = 0x98;
+ ch_start = 0;
+ ch_end = 2;
+
+ cr4 = read_cr4();
+ if(cr4 & ( 1 << 9)) { /* save the old value */
+ _SSE2 = 1;
+ }
+ cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
+ write_cr4(cr4);
+
+ msr = HWCR;
+ _RDMSR(msr, &lo, &hi);
+ /* FIXME: Why use SSEDIS */
+ if(lo & (1 << 17)) { /* save the old value */
+ _Wrap32Dis = 1;
+ }
+ lo |= (1 << 17); /* HWCR.wrap32dis */
+ lo &= ~(1 << 15); /* SSEDIS */
+ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
+
+ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
+
+ Errors = 0;
+ dev = pDCTstat->dev_dct;
+
+ for (Channel = 0; Channel < 2; Channel++) {
+ print_debug_dqs("\tTrainMaxRdLatency51: Node ", pDCTstat->Node_ID, 1);
+ print_debug_dqs("\tTrainMaxRdLatency51: Channel ", Channel, 1);
+ pDCTstat->Channel = Channel;
+
+ if (pDCTstat->DIMMValidDCT[Channel] == 0)
+ continue;
+
+ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f;
+
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+
+ /* Find DIMM with worst case receiver enable delays */
+ current_worst_case_total_delay_dimm = 0;
+ current_worst_case_total_delay_value = 0;
+
+ /* There are four receiver pairs, loosely associated with chipselects.
+ * This is essentially looping over each DIMM.
+ */
+ for (; Receiver < 8; Receiver += 2) {
+ Addl_Index = (Receiver >> 1) * 3 + 0x10;
+ dimm = (Receiver >> 1);
+
+ print_debug_dqs("\t\tTrainMaxRdLatency52: index ", Addl_Index, 2);
+
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ continue;
+ }
+
+ /* Retrieve the total delay values from pass 1 of DQS receiver enable training */
+ read_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg);
+ read_read_dqs_timing_control_registers(current_rdqs_total_delay, dev, Channel, dimm, index_reg);
+
+ for (lane = 0; lane < 8; lane++) {
+ current_total_delay[lane] += current_rdqs_total_delay[lane];
+ if (current_total_delay[lane] > current_worst_case_total_delay_value) {
+ current_worst_case_total_delay_dimm = dimm;
+ current_worst_case_total_delay_value = current_total_delay[lane];
+ }
+ }
+
+#if DQS_TRAIN_DEBUG > 0
+ for (lane = 0; lane < 8; lane++)
+ print_debug_dqs_pair("\t\tTrainMaxRdLatency56: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
+#endif
+ }
+
+ /* 2.10.5.8.5.1.1 */
+ Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 1);
+
+ /* 2.10.5.8.5.1.[2,3]
+ * Write the DRAM training pattern to the test address
+ */
+ write_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff);
+
+ /* 2.10.5.8.5.1.4
+ * Incrementally test each MaxRdLatency candidate
+ */
+ for (; pDCTstat->CH_MaxRdLat[Channel] < 0x3ff; pDCTstat->CH_MaxRdLat[Channel]++) {
+ write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]);
+ read_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff);
+ dword = Get_NB32_DCT(dev, Channel, 0x268) & 0x3ffff;
+ if (!dword)
+ break;
+ Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000050, 0x13131313);
+ }
+
+ /* 2.10.5.8.5.1.5 */
+ nb_pstate = 0;
+ mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f;
+ if (fam15h_freq_tab[mem_clk] == 0) {
+ return;
+ }
+ dword = Get_NB32(pDCTstat->dev_nbctl, (0x160 + (nb_pstate * 4))); /* Retrieve NbDid, NbFid */
+ nb_clk = (200 * (((dword >> 1) & 0x1f) + 0x4)) / (((dword >> 7) & 0x1)?2:1);
+
+ pDCTstat->CH_MaxRdLat[Channel]++;
+ pDCTstat->CH_MaxRdLat[Channel] += ((((uint64_t)15 * 100000000000ULL) / ((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL))
+ * ((uint64_t)nb_clk * 1000)) / 1000000000ULL;
+
+ write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]);
+ }
+
+ if(_DisableDramECC) {
+ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
+ }
+
+ if(!_Wrap32Dis) {
+ msr = HWCR;
+ _RDMSR(msr, &lo, &hi);
+ lo &= ~(1<<17); /* restore HWCR.wrap32dis */
+ _WRMSR(msr, lo, hi);
+ }
+ if(!_SSE2){
+ cr4 = read_cr4();
+ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
+ write_cr4(cr4);
+ }
+
+#if DQS_TRAIN_DEBUG > 0
+ {
+ u8 ChannelDTD;
+ printk(BIOS_DEBUG, "TrainMaxRdLatency: CH_MaxRdLat:\n");
+ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel:%x: %x\n",
+ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
+ }
+ }
+#endif
+
+ printk(BIOS_DEBUG, "TrainMaxRdLatency: Status %x\n", pDCTstat->Status);
+ printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrStatus %x\n", pDCTstat->ErrStatus);
+ printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrCode %x\n", pDCTstat->ErrCode);
+ printk(BIOS_DEBUG, "TrainMaxRdLatency: Done\n\n");
+}
+
u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
{
if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
index 4bfcc401ff..b354d923f8 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
@@ -168,6 +168,8 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat,
static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat)
{
+ printk(BIOS_DEBUG, "%s: Start\n", __func__);
+
uint8_t DCT0Present;
uint8_t DCT1Present;
uint32_t dword;
@@ -309,6 +311,8 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
mct_Wait(15000); /* Wait for 750us */
}
}
+
+ printk(BIOS_DEBUG, "%s: Done\n", __func__);
}
/*