summaryrefslogtreecommitdiff
path: root/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineeringinc.com>2015-09-05 17:55:58 -0500
committerMartin Roth <martinroth@google.com>2015-10-26 23:52:54 +0100
commitb8a355dcdf319671b97f8688209ad5d471fc0905 (patch)
tree6cd55b06343af460642431bb8dd3d782d0ccc45e /src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
parent7a5413a81c2fecc443999b006d641cd903327346 (diff)
downloadcoreboot-b8a355dcdf319671b97f8688209ad5d471fc0905.tar.xz
northbridge/amd/amdmct: Fix broken AMD K10 DDR3 memory initalization
The native AMD DDR3 memory initialization code was riddled with numerous errors and was missing critical configuration code segments; this made it so that DDR3 memory did not function on most AMD boards. This patch corrects enough of the DDR3 initialization such that UDIMMs can be used on most channels of G34 Opteron boards. Further work is needed to fix the broken RDIMM code and remaining UDIMM issues. Change-Id: Iab690db769e820600693ad1170085623b177b94e Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> Reviewed-on: http://review.coreboot.org/11941 Tested-by: build bot (Jenkins) Reviewed-by: Aaron Durbin <adurbin@chromium.org> Tested-by: Raptor Engineering Automated Test Stand <noreply@raptorengineeringinc.com>
Diffstat (limited to 'src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c')
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c806
1 files changed, 434 insertions, 372 deletions
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index 404727b493..cc2f43a952 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
u8 scale, u8 ChipSel);
static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel);
-static u8 MiddleDQS_D(u8 min, u8 max);
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 TestAddr_lo);
@@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
u32 addr_lo);
static void SetTargetWTIO_D(u32 TestAddr);
static void ResetTargetWTIO_D(void);
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 TestAddr_lo);
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat);
static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u8 ChipSel);
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel,
u8 receiver, u8 *valid);
static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 *buffer);
-
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
+static void proc_IOCLFLUSH_D(u32 addr_hi);
static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
@@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
pDCTstat->DQSDelay = (u8)DQSDelay;
}
+static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint32_t dword;
+
+ /* Lanes 0 - 3 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8));
+ dword &= ~0x7f7f7f7f;
+ dword |= (delay[3] & 0x7f) << 24;
+ dword |= (delay[2] & 0x7f) << 16;
+ dword |= (delay[1] & 0x7f) << 8;
+ dword |= delay[0] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword);
+
+ /* Lanes 4 - 7 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8));
+ dword &= ~0x7f7f7f7f;
+ dword |= (delay[7] & 0x7f) << 24;
+ dword |= (delay[6] & 0x7f) << 16;
+ dword |= (delay[5] & 0x7f) << 8;
+ dword |= delay[4] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword);
+
+ /* Lane 8 (ECC) */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8));
+ dword &= ~0x0000007f;
+ dword |= delay[8] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword);
+}
+
+static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint32_t dword;
+
+ /* Lanes 0 - 3 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8));
+ dword &= ~0x3f3f3f3f;
+ dword |= (delay[3] & 0x3f) << 24;
+ dword |= (delay[2] & 0x3f) << 16;
+ dword |= (delay[1] & 0x3f) << 8;
+ dword |= delay[0] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword);
+
+ /* Lanes 4 - 7 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8));
+ dword &= ~0x3f3f3f3f;
+ dword |= (delay[7] & 0x3f) << 24;
+ dword |= (delay[6] & 0x3f) << 16;
+ dword |= (delay[5] & 0x3f) << 8;
+ dword |= delay[4] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword);
+
+ /* Lane 8 (ECC) */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8));
+ dword &= ~0x0000003f;
+ dword |= delay[8] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword);
+}
+
+/* DQS Position Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3
+ */
static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
+ struct DCTStatStruc *pDCTstat)
{
u32 Errors;
- u8 Channel, DQSWrDelay;
+ u8 Channel;
+ u8 Receiver;
u8 _DisableDramECC = 0;
- u32 PatternBuffer[292];
+ u32 PatternBuffer[304]; /* 288 + 16 */
u8 _Wrap32Dis = 0, _SSE2 = 0;
- u8 dqsWrDelay_end;
+ u32 dev;
u32 addr;
+ u8 valid;
u32 cr4;
u32 lo, hi;
+ u32 index_reg;
+ uint32_t TestAddr;
+
+ uint8_t dual_rank;
+ uint8_t iter;
+ uint8_t lane;
+ uint16_t bytelane_test_results;
+ uint16_t current_write_dqs_delay[MAX_BYTE_LANES];
+ uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
+ uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES];
+ uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */
+ uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */
+
+ uint8_t last_pos = 0;
+ uint8_t cur_count = 0;
+ uint8_t best_pos = 0;
+ uint8_t best_count = 0;
print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
cr4 = read_cr4();
@@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
/* mct_BeforeTrainDQSRdWrPos_D */
- dqsWrDelay_end = 0x20;
+
+ dev = pDCTstat->dev_dct;
+ pDCTstat->Direction = DQS_READDIR;
+
+ /* 2.8.9.9.3 (2)
+ * Loop over each channel, lane, and rank
+ */
+
+ /* NOTE
+ * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow
+ * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be
+ * tested simultaneously, thus improving performance by around 8x.
+ */
Errors = 0;
for (Channel = 0; Channel < 2; Channel++) {
- print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
+ print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1);
pDCTstat->Channel = Channel;
if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */
continue;
- pDCTstat->DqsRdWrPos_Saved = 0;
- for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
- pDCTstat->DQSDelay = DQSWrDelay;
- pDCTstat->Direction = DQS_WRITEDIR;
- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
-
- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
- TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2);
- if (pDCTstat->DqsRdWrPos_Saved == 0xFF)
- break;
-
- print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
- if (pDCTstat->TrainErrors == 0) {
+
+ index_reg = 0x98 + 0x100 * Channel;
+
+ dual_rank = 0;
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+ /* There are four receiver pairs, loosely associated with chipselects.
+ * This is essentially looping over each rank of each DIMM.
+ */
+ for (; Receiver < 8; Receiver++) {
+ if ((Receiver & 0x1) == 0) {
+ /* Even rank of DIMM */
+ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
+ dual_rank = 1;
+ else
+ dual_rank = 0;
+ }
+
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ continue;
+ }
+
+ /* Select the base test address for the current rank */
+ TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
+ if (!valid) { /* Address not supported on current CS */
+ continue;
+ }
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4);
+ SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2);
+
+ /* 2.8.9.9.3 (DRAM Write Data Timing Loop)
+ * Iterate over all possible DQS delay values (0x0 - 0x7f)
+ */
+ uint8_t test_write_dqs_delay = 0;
+ uint8_t test_read_dqs_delay = 0;
+ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES];
+
+ /* Initialize variables */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ current_write_dqs_delay[lane] = 0;
+ passing_dqs_delay_found[lane] = 0;
+ write_dqs_delay_stepping_done[lane] = 0;
+ }
+
+ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6);
+
+ /* Break out of loop if passing window already found, */
+ if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1]
+ && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3]
+ && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5]
+ && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7])
break;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+ /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop)
+ * Iterate over all possible DQS delay values (0x0 - 0x3f)
+ */
+ for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) {
+ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6);
+
+ /* Initialize Read DQS Timing Control settings for this iteration */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+ if (!write_dqs_delay_stepping_done[lane])
+ current_read_dqs_delay[lane] = test_read_dqs_delay;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Initialize test result variable */
+ bytelane_test_results = 0xff;
+
+ /* Read the DRAM training pattern from the base test address three times
+ * NOTE
+ * While the BKDG states to read three times this is probably excessive!
+ * Decrease training time by only reading the test pattern once per iteration
+ */
+ for (iter = 0; iter < 1; iter++) {
+ /* Flush caches */
+ SetTargetWTIO_D(TestAddr);
+ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+ ResetTargetWTIO_D();
+
+ /* Read and compare pattern */
+ bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+ /* If all lanes have already failed testing bypass remaining re-read attempt(s) */
+ if (bytelane_test_results == 0x0)
+ break;
+ }
+
+ /* Store any lanes that passed testing for later use */
+ for (lane = 0; lane < 8; lane++)
+ if (!write_dqs_delay_stepping_done[lane])
+ dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+
+ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6);
+ }
+
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ if (write_dqs_delay_stepping_done[lane])
+ continue;
+
+ /* Determine location and length of longest consecutive string of passing values
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 64; iter++) {
+ if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+
+ if (best_count > 2) {
+ /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control
+ * register with the center of the passing window
+ */
+ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+ passing_dqs_delay_found[lane] = 1;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Exit the DRAM Write Data Timing Loop */
+ write_dqs_delay_stepping_done[lane] = 1;
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4);
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4);
+ }
+
+ /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */
+ if (!write_dqs_delay_stepping_done[lane])
+ current_write_dqs_delay[lane]++;
+ }
}
- Errors |= pDCTstat->TrainErrors;
- }
- pDCTstat->DqsRdWrPos_Saved = 0;
- if (DQSWrDelay < dqsWrDelay_end) {
- Errors = 0;
+ /* Flag failure(s) if present */
+ for (lane = 0; lane < 8; lane++) {
+ if (!passing_dqs_delay_found[lane]) {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+ }
+
+ /* Iterate over all possible Write Data Timing values (0x0 - 0x7f)
+ * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop
+ */
+ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+ /* Initialize Write Data Timing settings for this iteration */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+ current_write_dqs_delay[lane] = test_write_dqs_delay;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+ /* Flush caches */
+ SetTargetWTIO_D(TestAddr);
+ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+ ResetTargetWTIO_D();
+
+ /* Read and compare pattern from the base test address */
+ bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+ /* Store any lanes that passed testing for later use */
+ for (lane = 0; lane < 8; lane++)
+ dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+ }
+
+ for (lane = 0; lane < 8; lane++) {
+ if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) {
+
+#ifdef PRINT_PASS_FAIL_BITMAPS
+ for (iter = 0; iter < 64; iter++) {
+ if (dqs_read_results_array[0][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n");
+ for (iter = 0; iter < 64; iter++) {
+ if (dqs_read_results_array[1][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n\n");
+ for (iter = 0; iter < 128; iter++) {
+ if (dqs_write_results_array[0][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n");
+ for (iter = 0; iter < 128; iter++) {
+ if (dqs_write_results_array[1][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n\n");
+#endif
+
+ /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */
+ if (dual_rank) {
+ /* Intersect the passing windows of both ranks */
+ for (iter = 0; iter < 64; iter++)
+ if (!dqs_read_results_array[1][lane][iter])
+ dqs_read_results_array[0][lane][iter] = 0;
+ for (iter = 0; iter < 128; iter++)
+ if (!dqs_write_results_array[1][lane][iter])
+ dqs_write_results_array[0][lane][iter] = 0;
+ }
+
+ /* Determine location and length of longest consecutive string of passing values for read DQS timing
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 64; iter++) {
+ if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4);
+ if (best_count > 0) {
+ if (best_count < MIN_DQS_WNDW) {
+ /* Flag excessively small passing window */
+ Errors |= 1 << SB_SMALLDQS;
+ }
+
+ /* Find the center of the passing window */
+ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Save the final Read DQS Timing Control settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
+ } else {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+
+ /* Determine location and length of longest consecutive string of passing values for write DQS timing
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 128; iter++) {
+ if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4);
+ if (best_count > 0) {
+ if (best_count < MIN_DQS_WNDW) {
+ /* Flag excessively small passing window */
+ Errors |= 1 << SB_SMALLDQS;
+ }
+
+ /* Find the center of the passing window */
+ current_write_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Save the final Write Data Timing settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
+ } else {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+ }
+ }
- print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
- TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
}
- print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
- pDCTstat->ErrStatus |= Errors;
}
+ pDCTstat->TrainErrors |= Errors;
+ pDCTstat->ErrStatus |= Errors;
+
#if DQS_TRAIN_DEBUG > 0
{
u8 val;
u8 i;
- u8 Channel, Receiver, Dir;
+ u8 ChannelDTD, ReceiverDTD, Dir;
u8 *p;
for (Dir = 0; Dir < 2; Dir++) {
@@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
} else {
printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
}
- for (Channel = 0; Channel < 2; Channel++) {
- printk(BIOS_DEBUG, "Channel: %02x\n", Channel);
- for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
- printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver);
- p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
+ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
+ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
+ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
+ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
for (i=0;i<8; i++) {
val = p[i];
- printk(BIOS_DEBUG, "%02x ", val);
+ printk(BIOS_DEBUG, " %02x", val);
}
printk(BIOS_DEBUG, "\n");
}
@@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
pDCTstat->PtrPatternBufA = (u32)buf;
}
-static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- u32 Errors;
- u8 ChipSel, DQSDelay;
- u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF;
- u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF;
- u8 LastTest ,LastTestTot;
- u32 TestAddr;
- u8 ByteLane;
- u8 MutualCSPassW[128];
- u8 BanksPresent;
- u8 dqsDelay_end;
- u8 tmp, valid, tmp1;
- u16 word;
-
- /* MutualCSPassW: each byte represents a bitmap of pass/fail per
- * ByteLane. The indext within MutualCSPassW is the delay value
- * given the results.
- */
- print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
-
- Errors = 0;
- BanksPresent = 0;
-
- dqsDelay_end = 32;
- /* Bitmapped status per delay setting, 0xff=All positions
- * passing (1= PASS). Set the entire array.
- */
- for (DQSDelay=0; DQSDelay<128; DQSDelay++) {
- MutualCSPassW[DQSDelay] = 0xFF;
- }
-
- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
- print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
-
- if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
- print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
- continue;
- }
-
- BanksPresent = 1; /* flag for at least one bank is present */
- TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
- if (!valid) {
- print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
- continue;
- }
-
- print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
- SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
-
- if (pDCTstat->Direction == DQS_READDIR) {
- print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- }
-
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
-
- tmp = 0xFF;
- tmp1 = DQSDelay;
- if (pDCTstat->Direction == DQS_READDIR) {
- tmp &= MutualCSPassW[DQSDelay];
- tmp1 += dqsDelay_end;
- }
- tmp &= MutualCSPassW[tmp1];
-
- if (tmp == 0) {
- continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */
- }
-
- pDCTstat->DQSDelay = DQSDelay;
- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
- if (pDCTstat->Direction == DQS_WRITEDIR) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- }
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
- ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
- word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3);
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3);
- word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */
- word &= ~(pDCTstat->DqsRdWrPos_Saved << 8);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3);
-
- tmp = DQSDelay;
- if (pDCTstat->Direction == DQS_READDIR) {
- MutualCSPassW[tmp] &= word >> 8;
- tmp += dqsDelay_end;
- }
- MutualCSPassW[tmp] &= word & 0xFF;
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
- SetTargetWTIO_D(TestAddr);
- FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
- ResetTargetWTIO_D();
- }
-
- }
-
- if (pDCTstat->Direction == DQS_READDIR) {
- dqsDelay_end <<= 1;
- }
-
- if (BanksPresent) {
- #if 0 /* show the bitmap */
- for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) {
- printk(BIOS_DEBUG, ".");
- } else {
- printk(BIOS_DEBUG, "*");
- }
- }
- printk(BIOS_DEBUG, "\n");
- }
- #endif
- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
- print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
- if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) {
- pDCTstat->ByteLane = ByteLane;
- LastTest = DQS_FAIL; /* Analyze the results */
- LastTestTot = DQS_FAIL;
- /* RnkDlySeqPassMin = 0; */
- /* RnkDlySeqPassMax = 0; */
- RnkDlyFilterMax = 0;
- RnkDlyFilterMin = 0;
- RnkDlyFilterMaxTot = 0;
- RnkDlyFilterMinTot = 0;
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
- if (pDCTstat->Direction == DQS_READDIR)
- tmp = 0x20;
- else
- tmp = 0;
- if (DQSDelay >= tmp) {
- RnkDlySeqPassMax = DQSDelay;
- if (LastTest == DQS_FAIL) {
- RnkDlySeqPassMin = DQSDelay; /* start sequential run */
- }
- if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
- RnkDlyFilterMin = RnkDlySeqPassMin;
- RnkDlyFilterMax = RnkDlySeqPassMax;
- }
- LastTest = DQS_PASS;
- }
-
- if (pDCTstat->Direction == DQS_READDIR) {
- RnkDlySeqPassMaxTot = DQSDelay;
- if (LastTestTot == DQS_FAIL)
- RnkDlySeqPassMinTot = DQSDelay;
- if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){
- RnkDlyFilterMinTot = RnkDlySeqPassMinTot;
- RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot;
- }
- LastTestTot = DQS_PASS;
- }
- } else {
- LastTest = DQS_FAIL;
- LastTestTot = DQS_FAIL;
- }
- }
- print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
- if (RnkDlySeqPassMax == 0) {
- Errors |= 1 << SB_NODQSPOS; /* no passing window */
- } else {
- print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4);
- if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
- Errors |= 1 << SB_SMALLDQS;
- } else {
- u8 middle_dqs;
- /* mctEngDQSwindow_Save_D Not required for arrays */
- if (pDCTstat->Direction == DQS_READDIR)
- middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot);
- else
- middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
- pDCTstat->DQSDelay = middle_dqs;
- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */
- if (pDCTstat->Direction == DQS_READDIR)
- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */
- else
- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */
- print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
- pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane;
- }
- }
- }
- } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */
- }
-/* skipLocMiddle: */
- pDCTstat->TrainErrors = Errors;
-
- print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
-}
-
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
- [pDCTstat->Direction]
- [0]
- [pDCTstat->ByteLane] = RnkDlyFilterMin;
- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
- [pDCTstat->Direction]
- [1]
- [pDCTstat->ByteLane] = RnkDlyFilterMax;
-}
-
static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel)
{
@@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
pDCTstat->DQSDelay;
}
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
- u8 dn;
-
- if (pDCTstat->Direction == DQS_WRITEDIR) {
- dn = ChipSel >> 1;
- RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- } else {
- RnkDlyFilterMin <<= 1;
- RnkDlyFilterMax <<= 1;
- pDCTstat->DQSDelay <<= 1;
- }
- mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax);
- StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
-}
-
static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel)
{
@@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
/* FindDQSDatDimmVal_D is not required since we use an array */
-static u8 MiddleDQS_D(u8 min, u8 max)
-{
- u8 size;
- size = max-min;
- if (size % 2)
- size++; /* round up if the size isn't even. */
- return ( min + (size >> 1));
-}
-
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- print_debug_dqs("\t\tTrainReadPos ", 0, 2);
- pDCTstat->Direction = DQS_READDIR;
- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- pDCTstat->Direction = DQS_WRITEDIR;
- print_debug_dqs("\t\tTrainWritePos", 0, 2);
- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
static void proc_IOCLFLUSH_D(u32 addr_hi)
{
SetTargetWTIO_D(addr_hi);
@@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void)
_WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */
}
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 TestAddr_lo)
-{
- /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
- * The pattern is a stress pattern which exercises both ISI and
- * crosstalk. The number of cache lines to fill is dependent on DCT
- * width mode and burstlength.
- * Mode BL Lines Pattern no.
- * ----+---+-------------------
- * 64 4 9 0
- * 64 8 9 0
- * 64M 4 9 0
- * 64M 8 9 0
- * 128 4 18 1
- * 128 8 N/A -
- */
- if (pDCTstat->Pattern == 0)
- ReadL9TestPattern(TestAddr_lo);
- else
- ReadL18TestPattern(TestAddr_lo);
- _MFENCE;
-}
-
u32 SetUpperFSbase(u32 addr_hi)
{
/* Set the upper 32-bits of the Base address, 4GB aligned) for the
@@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
Set_NB32_index_wait(dev, index_reg, index, val);
}
-/* mctEngDQSwindow_Save_D not required with arrays */
-
void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
@@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
pDCTstat = pDCTstatA + Node;
if (pDCTstat->DCTSysLimit) {
+ TrainDQSRdWrPos_D(pMCTstat, pDCTstat);
for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
- TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
}
}
@@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
}
}
-/*
- * mct_SetDQSDelayAllCSR_D:
- * Write the Delay value to all eight byte lanes.
- */
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- u8 ByteLane;
- u8 ChipSel = cs_start;
-
- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
- if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
- pDCTstat->ByteLane = ByteLane;
- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
- }
- }
- }
-}
-
u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u8 Channel, u8 ChipSel)
@@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
reg = 0x40 + (receiver << 2) + reg_off;
val = Get_NB32(dev, reg);
- val &= ~0x0F;
+ val &= ~0xe007c01f;
/* unganged mode DCT0+DCT1, sys addr of DCT1=node
* base+DctSelBaseAddr+local ca base*/
@@ -1277,6 +1338,7 @@ exitGetAddrWNoError:
print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);