diff options
author | Zheng Bao <zheng.bao@amd.com> | 2010-04-23 17:32:48 +0000 |
---|---|---|
committer | Stefan Reinauer <stepan@openbios.org> | 2010-04-23 17:32:48 +0000 |
commit | eb75f652d392d2f4f257194e112f3f0db7479145 (patch) | |
tree | aa972907734abcba4ca52f2a3a71f8d81d4bdce0 /src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | |
parent | fe6c2cda6e6977894d9b668af9509b983c850f68 (diff) | |
download | coreboot-eb75f652d392d2f4f257194e112f3f0db7479145.tar.xz |
DDR3 support for AMD Fam10.
Signed-off-by: Zheng Bao <zheng.bao@amd.com>
Acked-by: Stefan Reinauer <stepan@coresystems.de>
git-svn-id: svn://svn.coreboot.org/coreboot/trunk@5481 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
Diffstat (limited to 'src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c')
-rw-r--r-- | src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 1056 |
1 files changed, 1056 insertions, 0 deletions
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c new file mode 100644 index 0000000000..86d07423f3 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c @@ -0,0 +1,1056 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/****************************************************************************** + Description: Receiver En and DQS Timing Training feature for DDR 3 MCT +******************************************************************************/ + +static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass); +static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, + u8 rcvrEnDly, u8 Channel, + u8 receiver, u8 Pass); +static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 addr, u8 channel, + u8 pattern, u8 Pass); +static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); +static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); +static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, + u8 RcvrEnDly, u8 where, + u8 Channel, u8 Receiver, + u32 dev, u32 index_reg, + u8 Addl_Index, u8 Pass); +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly); +static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); + +/* Warning: These must be located so they do not cross a logical 16-bit + segment boundary! */ +const static u32 TestPattern0_D[] = { + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +}; +const static u32 TestPattern1_D[] = { + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, +}; +const static u32 TestPattern2_D[] = { + 0x12345678, 0x87654321, 0x23456789, 0x98765432, + 0x59385824, 0x30496724, 0x24490795, 0x99938733, + 0x40385642, 0x38465245, 0x29432163, 0x05067894, + 0x12349045, 0x98723467, 0x12387634, 0x34587623, +}; + +static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass) +{ + /* + * 1. Copy the alpha and Beta patterns from ROM to Cache, + * aligning on 16 byte boundary + * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha + * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta + */ + u32 *buf_a; + u32 *buf_b; + u32 *p_A; + u32 *p_B; + u8 i; + + buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); + buf_b = buf_a + 32; /* ?? */ + p_A = (u32 *)SetupDqsPattern_1PassB(pass); + p_B = (u32 *)SetupDqsPattern_1PassA(pass); + + for(i=0;i<16;i++) { + buf_a[i] = p_A[i]; + buf_b[i] = p_B[i]; + } + + pDCTstat->PtrPatternBufA = (u32)buf_a; + pDCTstat->PtrPatternBufB = (u32)buf_b; +} + +void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) +{ + if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) + dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); +} + +static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) +{ + u8 Channel, RcvrEnDly, RcvrEnDlyRmin; + u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1; + u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB; + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; + u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2]; + u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; + u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */ + u32 Errors; + + u32 val; + u32 reg; + u32 dev; + u32 index_reg; + u32 ch_start, ch_end, ch; + u32 msr; + u32 cr4; + u32 lo, hi; + + u8 valid; + u32 tmp; + u8 LastTest; + + print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); + print_debug_dqs("TrainRcvEn: Pass", Pass, 0); + + dev = pDCTstat->dev_dct; + ch_start = 0; + if(!pDCTstat->GangedMode) { + ch_end = 2; + } else { + ch_end = 1; + } + + for (ch = ch_start; ch < ch_end; ch++) { + reg = 0x78 + (0x100 * ch); + val = Get_NB32(dev, reg); + val &= ~(0x3ff << 22); + val |= (0x0c8 << 22); /* Max Rd Lat */ + Set_NB32(dev, reg, val); + } + + Final_Value = 1; + if (Pass == FirstPass) { + mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat); + } else { + pDCTstat->DimmTrainFail = 0; + pDCTstat->CSTrainFail = ~pDCTstat->CSPresent; + } + + cr4 = read_cr4(); + if(cr4 & ( 1 << 9)) { /* save the old value */ + _SSE2 = 1; + } + cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + + msr = HWCR; + _RDMSR(msr, &lo, &hi); + /* FIXME: Why use SSEDIS */ + if(lo & (1 << 17)) { /* save the old value */ + _Wrap32Dis = 1; + } + lo |= (1 << 17); /* HWCR.wrap32dis */ + lo &= ~(1 << 15); /* SSEDIS */ + _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ + + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass); + + Errors = 0; + dev = pDCTstat->dev_dct; + CTLRMaxDelay = 0; + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); + print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); + pDCTstat->Channel = Channel; + + MaxDelay_CH[Channel] = 0; + index_reg = 0x98 + 0x100 * Channel; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + /* There are four receiver pairs, loosely associated with chipselects. */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; + LastTest = DQS_FAIL; + + /* mct_ModifyIndex_D */ + RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff; + + print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); + + if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + continue; + } + + TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); + if(!valid) { /* Address not supported on current CS */ + continue; + } + + TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3); + + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) { + TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid); + if(!valid) { /* Address not supported on current CS */ + continue; + } + TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3); + _2Ranks = 1; + } else { + _2Ranks = TestAddr1 = TestAddr1B = 0; + } + + print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); + + /* + * Get starting RcvrEnDly value + */ + RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass); + + /* mct_GetInitFlag_D*/ + if (Pass == FirstPass) { + pDCTstat->DqsRcvEn_Pass = 0; + } else { + pDCTstat->DqsRcvEn_Pass=0xFF; + } + pDCTstat->DqsRcvEn_Saved = 0; + + + while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */ + print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); + + /* callback not required + if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly)) + goto skipDly; + */ + + /* Odd steps get another pattern such that even + and odd steps alternate. The pointers to the + patterns will be swaped at the end of the loop + so that they correspond. */ + if(RcvrEnDly & 1) { + PatternA = 1; + PatternB = 0; + } else { + /* Even step */ + PatternA = 0; + PatternB = 1; + } + + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */ + if(_2Ranks) { + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */ + } + + mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass); + + CurrTest = DQS_FAIL; + CurrTestSide0 = DQS_FAIL; + CurrTestSide1 = DQS_FAIL; + + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */ + Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */ + proc_IOCLFLUSH_D(TestAddr0); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3); + + /* != 0x00 mean pass */ + + if(Test0 == DQS_PASS) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */ + /* ROM vs cache compare */ + Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass); + proc_IOCLFLUSH_D(TestAddr0B); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3); + + if(Test1 == DQS_PASS) { + CurrTestSide0 = DQS_PASS; + } + } + if(_2Ranks) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */ + /* ROM vs cache compare */ + Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass); + proc_IOCLFLUSH_D(TestAddr1); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3); + + if(Test0 == DQS_PASS) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */ + /* ROM vs cache compare */ + Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass); + proc_IOCLFLUSH_D(TestAddr1B); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3); + if(Test1 == DQS_PASS) { + CurrTestSide1 = DQS_PASS; + } + } + } + + if(_2Ranks) { + if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) { + CurrTest = DQS_PASS; + } + } else if (CurrTestSide0 == DQS_PASS) { + CurrTest = DQS_PASS; + } + + /* record first pass DqsRcvEn to stack */ + valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass); + + /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */ + if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) { + RcvrEnDlyRmin = RcvrEnDly; + break; + } + + LastTest = CurrTest; + + /* swap the rank 0 pointers */ + tmp = TestAddr0; + TestAddr0 = TestAddr0B; + TestAddr0B = tmp; + + /* swap the rank 1 pointers */ + tmp = TestAddr1; + TestAddr1 = TestAddr1B; + TestAddr1B = tmp; + + print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); + + RcvrEnDly++; + + } /* while RcvrEnDly */ + + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3); + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3); + if(RcvrEnDlyRmin == RcvrEnDlyLimit) { + /* no passing window */ + pDCTstat->ErrStatus |= 1 << SB_NORCVREN; + Errors |= 1 << SB_NORCVREN; + pDCTstat->ErrCode = SC_FatalErr; + } + + if(RcvrEnDly > (RcvrEnDlyLimit - 1)) { + /* passing window too narrow, too far delayed*/ + pDCTstat->ErrStatus |= 1 << SB_SmallRCVR; + Errors |= 1 << SB_SmallRCVR; + pDCTstat->ErrCode = SC_FatalErr; + RcvrEnDly = RcvrEnDlyLimit - 1; + pDCTstat->CSTrainFail |= 1 << Receiver; + pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); + } + + /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */ + mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass); + + mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass); + + if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) { + Errors |= 1 << SB_SmallRCVR; + } + + RcvrEnDly += Pass1MemClkDly; + if(RcvrEnDly > CTLRMaxDelay) { + CTLRMaxDelay = RcvrEnDly; + } + + } /* while Receiver */ + MaxDelay_CH[Channel] = CTLRMaxDelay; + } /* for Channel */ + + CTLRMaxDelay = MaxDelay_CH[0]; + if (MaxDelay_CH[1] > CTLRMaxDelay) + CTLRMaxDelay = MaxDelay_CH[1]; + + for (Channel = 0; Channel < 2; Channel++) { + mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */ + } + + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + + if (Pass == FirstPass) { + /*Disable DQSRcvrEn training mode */ + mct_DisableDQSRcvEn_D(pDCTstat); + } + + if(!_Wrap32Dis) { + msr = HWCR; + _RDMSR(msr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(msr, lo, hi); + } + if(!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 Channel; + printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); + for(Channel = 0; Channel<2; Channel++) { + printk(BIOS_DEBUG, "Channel:%x: %x\n", + Channel, pDCTstat->CH_MaxRdLat[Channel]); + } + } +#endif + +#if DQS_TRAIN_DEBUG > 0 + { + u8 val; + u8 Channel, Receiver; + u8 i; + u8 *p; + + printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); + for(Channel = 0; Channel < 2; Channel++) { + printk(BIOS_DEBUG, "Channel:%x\n"); + for(Receiver = 0; Receiver<8; Receiver+=2) { + printk(BIOS_DEBUG, "\t\tReceiver:%x:"); + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; + for (i=0;i<8; i++) { + val = p[i]; + printk(BIOS_DEBUG, "%x ", val); + } + printk(BIOS_DEBUG, "\n"); + } + } + } +#endif + + printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status); + printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus); + printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode); + printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); +} + +u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) +{ + if (pDCTstat->DIMMValidDCT[dct] == 0 ) { + return 8; + } else { + return 0; + } +} + +static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/) +{ + /* + * Program final DqsRcvEnDly to additional index for DQS receiver + * enabled delay + */ + mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass); +} + +static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) +{ + u8 ch_end, ch; + u32 reg; + u32 dev; + u32 val; + + dev = pDCTstat->dev_dct; + if (pDCTstat->GangedMode) { + ch_end = 1; + } else { + ch_end = 2; + } + + for (ch=0; ch<ch_end; ch++) { + reg = 0x78 + 0x100 * ch; + val = Get_NB32(dev, reg); + val &= ~(1 << DqsRcvEnTrain); + Set_NB32(dev, reg, val); + } +} + +/* mct_ModifyIndex_D + * Function only used once so it was inlined. + */ + +/* mct_GetInitFlag_D + * Function only used once so it was inlined. + */ + +void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, + u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, + u32 index_reg, u8 Addl_Index, u8 Pass) +{ + u32 index; + u8 i; + u8 *p; + u32 val; + + if(RcvrEnDly == 0xFE) { + /*set the boudary flag */ + pDCTstat->Status |= 1 << SB_DQSRcvLimit; + } + + /* DimmOffset not needed for CH_D_B_RCVRDLY array */ + for(i=0; i < 8; i++) { + if(FinalValue) { + /*calculate dimm offset */ + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; + RcvrEnDly = p[i]; + } + + /* if flag=0, set DqsRcvEn value to reg. */ + /* get the register index from table */ + index = Table_DQSRcvEn_Offset[i >> 1]; + index += Addl_Index; /* DIMMx DqsRcvEn byte0 */ + val = Get_NB32_index_wait(dev, index_reg, index); + if(i & 1) { + /* odd byte lane */ + val &= ~(0xFF << 16); + val |= (RcvrEnDly << 16); + } else { + /* even byte lane */ + val &= ~0xFF; + val |= RcvrEnDly; + } + Set_NB32_index_wait(dev, index_reg, index, val); + } + +} + +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly) +{ + u32 dev; + u32 reg; + u16 SubTotal; + u32 index_reg; + u32 reg_off; + u32 val; + u32 valx; + + if(pDCTstat->GangedMode) + Channel = 0; + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * Channel; + index_reg = 0x98 + reg_off; + + /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ + val = Get_NB32(dev, 0x88 + reg_off); + SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */ + + /* If registered DIMMs are being used then + * add 1 MEMCLK to the sub-total. + */ + val = Get_NB32(dev, 0x90 + reg_off); + if(!(val & (1 << UnBuffDimm))) + SubTotal += 2; + + /* If the address prelaunch is setup for 1/2 MEMCLKs then + * add 1, else add 2 to the sub-total. + * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2; + */ + val = Get_NB32_index_wait(dev, index_reg, 0x04); + if(!(val & 0x00202020)) + SubTotal += 1; + else + SubTotal += 2; + + /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, + * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ + val = Get_NB32(dev, 0x78 + reg_off); + SubTotal += 8 - (val & 0x0f); + + /* Convert bits 7-5 (also referred to as the course delay) of + * the current (or worst case) DQS receiver enable delay to + * 1/2 MEMCLKs units, rounding up, and add this to the sub-total. + */ + SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ + + /* Add 5.5 to the sub-total. 5.5 represents part of the + * processor specific constant delay value in the DRAM + * clock domain. + */ + SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ + SubTotal += 11; /*add 5.5 1/2MemClk */ + + /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge + * clocks (NCLKs) as follows (assuming DDR400 and assuming + * that no P-state or link speed changes have occurred). + */ + + /* New formula: + * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ + val = Get_NB32(dev, 0x94 + reg_off); + + /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ + val &= 7; + if (val >= 3) { + val <<= 1; + } else + val += 3; + valx = val << 2; + + val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); + SubTotal *= ((val & 0x1f) + 4 ) * 3; + + SubTotal /= valx; + if (SubTotal % valx) { /* round up */ + SubTotal++; + } + + /* Add 5 NCLKs to the sub-total. 5 represents part of the + * processor specific constant value in the northbridge + * clock domain. + */ + SubTotal += 5; + + pDCTstat->CH_MaxRdLat[Channel] = SubTotal; + if(pDCTstat->GangedMode) { + pDCTstat->CH_MaxRdLat[1] = SubTotal; + } + + /* Program the F2x[1, 0]78[MaxRdLatency] register with + * the total delay value (in NCLKs). + */ + reg = 0x78 + reg_off; + val = Get_NB32(dev, reg); + val &= ~(0x3ff << 22); + val |= (SubTotal & 0x3ff) << 22; + + /* program MaxRdLatency to correspond with current delay */ + Set_NB32(dev, reg, val); +} + +static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, + u8 rcvrEnDly, u8 Channel, + u8 receiver, u8 Pass) +{ + u8 i; + u8 mask_Saved, mask_Pass; + u8 *p; + + /* calculate dimm offset + * not needed for CH_D_B_RCVRDLY array + */ + + /* cmp if there has new DqsRcvEnDly to be recorded */ + mask_Pass = pDCTstat->DqsRcvEn_Pass; + + if(Pass == SecondPass) { + mask_Pass = ~mask_Pass; + } + + mask_Saved = pDCTstat->DqsRcvEn_Saved; + if(mask_Pass != mask_Saved) { + + /* find desired stack offset according to channel/dimm/byte */ + if(Pass == SecondPass) { + /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */ + p = 0; /* Keep the compiler happy. */ + } else { + mask_Saved &= mask_Pass; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1]; + } + for(i=0; i < 8; i++) { + /* cmp per byte lane */ + if(mask_Pass & (1 << i)) { + if(!(mask_Saved & (1 << i))) { + /* save RcvEnDly to stack, according to + the related Dimm/byte lane */ + p[i] = (u8)rcvrEnDly; + mask_Saved |= 1 << i; + } + } + } + pDCTstat->DqsRcvEn_Saved = mask_Saved; + } + return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass); +} + +static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 addr, u8 channel, + u8 pattern, u8 Pass) +{ + /* Compare only the first beat of data. Since target addrs are cache + * line aligned, the Channel parameter is used to determine which + * cache QW to compare. + */ + + u8 *test_buf; + u8 i; + u8 result; + u8 value; + + if(Pass == FirstPass) { + if(pattern==1) { + test_buf = (u8 *)TestPattern1_D; + } else { + test_buf = (u8 *)TestPattern0_D; + } + } else { /* Second Pass */ + test_buf = (u8 *)TestPattern2_D; + } + + SetUpperFSbase(addr); + addr <<= 8; + + if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { + addr += 8; /* second channel */ + test_buf += 8; + } + + print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4); + for (i=0; i<8; i++, addr ++) { + value = read32_fs(addr); + print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4); + + if (value == test_buf[i]) { + pDCTstat->DqsRcvEn_Pass |= (1<<i); + } else { + pDCTstat->DqsRcvEn_Pass &= ~(1<<i); + } + } + + result = DQS_FAIL; + + if (Pass == FirstPass) { + /* if first pass, at least one byte lane pass + * ,then DQS_PASS=1 and will set to related reg. + */ + if(pDCTstat->DqsRcvEn_Pass != 0) { + result = DQS_PASS; + } else { + result = DQS_FAIL; + } + + } else { + /* if second pass, at least one byte lane fail + * ,then DQS_FAIL=1 and will set to related reg. + */ + if(pDCTstat->DqsRcvEn_Pass != 0xFF) { + result = DQS_FAIL; + } else { + result = DQS_PASS; + } + } + + /* if second pass, we can't find the fail until FFh, + * then let it fail to save the final delay + */ + if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) { + result = DQS_FAIL; + pDCTstat->DqsRcvEn_Pass = 0; + } + + /* second pass needs to be inverted + * FIXME? this could be inverted in the above code to start with... + */ + if(Pass == SecondPass) { + if (result == DQS_PASS) { + result = DQS_FAIL; + } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */ + result = DQS_PASS; + } + } + + + return result; +} + +static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Initialize the DQS Positions in preparation for + * Reciever Enable Training. + * Write Position is 1/2 Memclock Delay + * Read Position is 1/2 Memclock Delay + */ + u8 i; + for(i=0;i<2; i++){ + InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i); + } +} + +static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel) +{ + /* Initialize the DQS Positions in preparation for + * Reciever Enable Training. + * Write Position is no Delay + * Read Position is 1/2 Memclock Delay + */ + + u8 i, j; + u32 dword; + u8 dn = 4; /* TODO: Rev C could be 4 */ + u32 dev = pDCTstat->dev_dct; + u32 index_reg = 0x98 + 0x100 * Channel; + + /* FIXME: add Cx support */ + dword = 0x00000000; + for(i=1; i<=3; i++) { + for(j=0; j<dn; j++) + /* DIMM0 Write Data Timing Low */ + /* DIMM0 Write ECC Timing */ + Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + } + + /* errata #180 */ + dword = 0x2f2f2f2f; + for(i=5; i<=6; i++) { + for(j=0; j<dn; j++) + /* DIMM0 Read DQS Timing Control Low */ + Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + } + + dword = 0x0000002f; + for(j=0; j<dn; j++) + /* DIMM0 Read DQS ECC Timing Control */ + Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword); +} + +void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) +{ + u32 dev; + u32 index_reg; + u32 index; + u8 ChipSel; + u8 *p; + u32 val; + + dev = pDCTstat->dev_dct; + index_reg = 0x98 + Channel * 0x100; + index = 0x12; + p = pDCTstat->CH_D_BC_RCVRDLY[Channel]; + print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2); + for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + val = p[ChipSel>>1]; + Set_NB32_index_wait(dev, index_reg, index, val); + print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ", + ChipSel, " rcvr_delay ", val, 2); + index += 3; + } +} + +static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel) +{ + u8 ChipSel; + u16 EccDQSLike; + u8 EccDQSScale; + u32 val, val0, val1; + + EccDQSLike = pDCTstat->CH_EccDQSLike[Channel]; + EccDQSScale = pDCTstat->CH_EccDQSScale[Channel]; + + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) { + u8 *p; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1]; + + /* DQS Delay Value of Data Bytelane + * most like ECC byte lane */ + val0 = p[EccDQSLike & 0x07]; + /* DQS Delay Value of Data Bytelane + * 2nd most like ECC byte lane */ + val1 = p[(EccDQSLike>>8) & 0x07]; + + if (!(pDCTstat->Status & (1 << SB_Registered))) { + if(val0 > val1) { + val = val0 - val1; + } else { + val = val1 - val0; + } + + val *= ~EccDQSScale; + val >>= 8; /* /256 */ + + if(val0 > val1) { + val -= val1; + } else { + val += val0; + } + } else { + val = val1 - val0; + val += val1; + } + + pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val; + } + } + SetEccDQSRcvrEn_D(pDCTstat, Channel); +} + +void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u8 i; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (!pDCTstat->NodePresent) + break; + if (pDCTstat->DCTSysLimit) { + for(i=0; i<2; i++) + CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i); + } + } +} + +void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node = 0; + struct DCTStatStruc *pDCTstat; + + /* FIXME: skip for Ax */ + while (Node < MAX_NODES_SUPPORTED) { + pDCTstat = pDCTstatA + Node; + + if(pDCTstat->DCTSysLimit) { + fenceDynTraining_D(pMCTstat, pDCTstat, 0); + fenceDynTraining_D(pMCTstat, pDCTstat, 1); + } + Node++; + } +} + +static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u16 avRecValue; + u32 val; + u32 dev; + u32 index_reg = 0x98 + 0x100 * dct; + u32 index; + + /* BIOS first programs a seed value to the phase recovery engine + * (recommended 19) registers. + * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and + * F2x[1,0]9C_x52.) . + */ + dev = pDCTstat->dev_dct; + for (index = 0x50; index <= 0x52; index ++) { + val = Get_NB32_index_wait(dev, index_reg, index) & ~0xFF; + val |= (FenceTrnFinDlySeed & 0x1F); + if (index != 0x52) { + val &= ~(0xFF << 8); + val |= (val & 0xFF) << 8; + val &= 0xFFFF; + val |= val << 16; + } + Set_NB32_index_wait(dev, index_reg, index, val); + } + + /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ + val = Get_NB32_index_wait(dev, index_reg, 0x08); + val |= 1 << PhyFenceTrEn; + Set_NB32_index_wait(dev, index_reg, 0x08, val); + + /* Wait 200 MEMCLKs. */ + mct_Wait(50000); /* wait 200us */ + + /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ + val = Get_NB32_index_wait(dev, index_reg, 0x08); + val &= ~(1 << PhyFenceTrEn); + Set_NB32_index_wait(dev, index_reg, 0x08, val); + + /* BIOS reads the phase recovery engine registers + * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ + avRecValue = 0; + for (index = 0x50; index <= 0x52; index ++) { + val = Get_NB32_index_wait(dev, index_reg, index); + avRecValue += val & 0x7F; + if (index != 0x52) { + avRecValue += (val >> 8) & 0x7F; + avRecValue += (val >> 16) & 0x7F; + avRecValue += (val >> 24) & 0x7F; + } + } + + val = avRecValue / 9; + if (avRecValue % 9) + val++; + avRecValue = val; + + /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ + /* inlined mct_AdjustFenceValue() */ + /* The RBC0 is not supported. */ + /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) + avRecValue -= 3; + else + */ + if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + avRecValue -= 8; + else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) + avRecValue -= 8; + + val = Get_NB32_index_wait(dev, index_reg, 0x0C); + val &= ~(0x1F << 16); + val |= (avRecValue & 0x1F) << 16; + Set_NB32_index_wait(dev, index_reg, 0x0C, val); + + /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register + * delays (both channels). */ + val = Get_NB32_index_wait(dev, index_reg, 0x04); + Set_NB32_index_wait(dev, index_reg, 0x04, val); +} + +void mct_Wait(u32 cycles) +{ + u32 saved; + u32 hi, lo, msr; + + /* Wait # of 50ns cycles + This seems like a hack to me... */ + + cycles <<= 3; /* x8 (number of 1.25ns ticks) */ + + msr = 0x10; /* TSC */ + _RDMSR(msr, &lo, &hi); + saved = lo; + do { + _RDMSR(msr, &lo, &hi); + } while (lo - saved < cycles ); +} |