diff options
Diffstat (limited to 'src/northbridge')
-rw-r--r-- | src/northbridge/amd/amdk8/Config.lb | 8 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/amdk8.h | 11 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/amdk8_acpi.c | 156 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/amdk8_f.h | 561 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/amdk8_f_pci.c | 57 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/coherent_ht.c | 4 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/coherent_ht_car.c | 17 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/get_sblk_pci1234.c | 48 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/incoherent_ht.c | 298 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/misc_control.c | 2 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/northbridge.c | 76 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/raminit.c | 26 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/raminit.h | 3 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/raminit_f.c | 3065 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/raminit_f_dqs.c | 2036 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/setup_resource_map.c | 16 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/spd_ddr2.h | 66 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/ssdt.dsl | 78 |
18 files changed, 6298 insertions, 230 deletions
diff --git a/src/northbridge/amd/amdk8/Config.lb b/src/northbridge/amd/amdk8/Config.lb index 23245a66b9..bc6054f75d 100644 --- a/src/northbridge/amd/amdk8/Config.lb +++ b/src/northbridge/amd/amdk8/Config.lb @@ -19,6 +19,14 @@ end if HAVE_ACPI_TABLES object amdk8_acpi.o + makerule ssdt.c + depends "$(TOP)/src/northbridge/amd/amdk8/ssdt.dsl" + action "/usr/sbin/iasl -tc $(TOP)/src/northbridge/amd/amdk8/ssdt.dsl" + action "perl -pi -e 's/AmlCode/AmlCode_ssdt/g' ssdt.hex" + action "mv ssdt.hex ssdt.c" + end + object ./ssdt.o end + object get_sblk_pci1234.o diff --git a/src/northbridge/amd/amdk8/amdk8.h b/src/northbridge/amd/amdk8/amdk8.h index 7e064af974..56b92a2349 100644 --- a/src/northbridge/amd/amdk8/amdk8.h +++ b/src/northbridge/amd/amdk8/amdk8.h @@ -2,6 +2,10 @@ #define AMDK8_H +#if K8_REV_F_SUPPORT == 1 + #include "amdk8_f.h" + +#else /* Definitions of various K8 registers */ /* Function 0 */ #define HT_TRANSACTION_CONTROL 0x68 @@ -55,6 +59,7 @@ #define DRAM_CSBASE 0x40 #define DRAM_CSMASK 0x60 #define DRAM_BANK_ADDR_MAP 0x80 + #define DRAM_TIMING_LOW 0x88 #define DTL_TCL_SHIFT 0 #define DTL_TCL_MASK 0x7 @@ -96,6 +101,7 @@ #define DTL_TWR_BASE 2 #define DTL_TWR_MIN 2 #define DTL_TWR_MAX 3 + #define DRAM_TIMING_HIGH 0x8c #define DTH_TWTR_SHIFT 0 #define DTH_TWTR_MASK 0x1 @@ -122,6 +128,7 @@ #define DTH_TWCL_BASE 1 #define DTH_TWCL_MIN 1 #define DTH_TWCL_MAX 2 + #define DRAM_CONFIG_LOW 0x90 #define DCL_DLL_Disable (1<<0) #define DCL_D_DRV (1<<1) @@ -140,7 +147,7 @@ #define DCL_DisInRcvrs (1<<24) #define DCL_BypMax_SHIFT 25 #define DCL_En2T (1<<28) -#define DCL_UpperCSMap (1<<29) + #define DRAM_CONFIG_HIGH 0x94 #define DCH_ASYNC_LAT_SHIFT 0 #define DCH_ASYNC_LAT_MASK 0xf @@ -232,3 +239,5 @@ #define ConnectionPending (1 << 4) #endif + +#endif /* AMDK8_H */ diff --git a/src/northbridge/amd/amdk8/amdk8_acpi.c b/src/northbridge/amd/amdk8/amdk8_acpi.c index 71bcdbbd1a..35945b341e 100644 --- a/src/northbridge/amd/amdk8/amdk8_acpi.c +++ b/src/northbridge/amd/amdk8/amdk8_acpi.c @@ -43,6 +43,7 @@ acknowledgement of AMD's proprietary rights in them. #include <device/pci.h> #include <cpu/x86/msr.h> #include <cpu/amd/mtrr.h> +#include <cpu/amd/amdk8_sysconf.h> //it seems these function can be moved arch/i386/boot/acpi.c @@ -112,6 +113,8 @@ unsigned long acpi_create_srat_lapics(unsigned long current) return current; } + + static unsigned long resk(uint64_t value) { unsigned long resultk; @@ -153,7 +156,6 @@ void set_srat_mem(void *gp, struct device *dev, struct resource *res) state->current += acpi_create_srat_mem((acpi_srat_mem_t *)state->current, (res->index & 0xf), basek, sizek, 1); // need to figure out NV } - unsigned long acpi_fill_srat(unsigned long current) { struct acpi_srat_mem_state srat_mem_state; @@ -175,5 +177,157 @@ unsigned long acpi_fill_srat(unsigned long current) #endif return current; } + + +unsigned long acpi_fill_slit(unsigned long current) +{ + /* need to find out the node num at first */ + /* fill the first 8 byte with that num */ + /* fill the next num*num byte with distance, local is 10, 1 hop mean 20, and 2 hop with 30.... */ + + /* because We has assume that we know the topology of the HT connection, So we can have set if we know the node_num */ + static uint8_t hops_8[] = { 0, 1, 1, 2, 2, 3, 3, 4, + 1, 0, 2, 1, 3, 2, 4, 3, + 1, 2, 0, 1, 1, 2, 2, 3, + 2, 1, 1, 0, 2, 1, 3, 2, + 2, 3, 1, 2, 0, 1, 1, 2, + 3, 2, 2, 1, 1, 0, 2, 1, + 3, 4, 2, 3, 1, 2, 0, 1, + 4, 4, 3, 2, 2, 1, 1, 0 }; + +// uint8_t outer_node[8]; + + uint8_t *p = (uint8_t *)current; + int nodes = sysconf.nodes; + int i,j; + memset(p, 0, 8+nodes*nodes); +// memset((uint8_t *)outer_node, 0, 8); + *p = (uint8_t) nodes; + p += 8; + +#if 0 + for(i=0;i<sysconf.hc_possible_num;i++) { + if((sysconf.pci1234[i]&1) !=1 ) continue; + outer_node[(sysconf.pci1234[i] >> 4) & 0xf] = 1; // mark the outer node + } +#endif + + for(i=0;i<nodes;i++) { + for(j=0;j<nodes; j++) { + if(i==j) { p[i*nodes+j] = 10; } + else { +#if 0 + int k; + uint8_t latency_factor = 0; + int k_start, k_end; + if(i<j) { + k_start = i; + k_end = j; + } else { + k_start = j; + k_end = i; + } + for(k=k_start;k<=k_end; k++) { + if(outer_node[k]) { + latency_factor = 1; + break; + } + } + p[i*nodes+j] = hops_8[i*nodes+j] * 2 + latency_factor + 10; +#else + p[i*nodes+j] = hops_8[i*nodes+j] * 2 + 10; +#endif + + + } + } + } + + current += 8+nodes*nodes; + + return current; +} + + //end + +// moved from mb acpi_tables.c +static void int_to_stream(uint32_t val, uint8_t *dest) +{ + int i; + for(i=0;i<4;i++) { + *(dest+i) = (val >> (8*i)) & 0xff; + } +} + + +// used by acpi_tables.h + +void update_ssdt(void *ssdt) +{ + uint8_t *BUSN; + uint8_t *MMIO; + uint8_t *PCIO; + uint8_t *SBLK; + uint8_t *TOM1; + uint8_t *SBDN; + uint8_t *HCLK; + uint8_t *HCDN; + uint8_t *CBST; + + int i; + device_t dev; + uint32_t dword; + msr_t msr; + + BUSN = ssdt+0x3a; //+5 will be next BUSN + MMIO = ssdt+0x57; //+5 will be next MMIO + PCIO = ssdt+0xaf; //+5 will be next PCIO + SBLK = ssdt+0xdc; // one byte + TOM1 = ssdt+0xe3; // + SBDN = ssdt+0xed;// + HCLK = ssdt+0xfa; //+5 will be next HCLK + HCDN = ssdt+0x12a; //+5 will be next HCDN + CBST = ssdt+0x157; // + + dev = dev_find_slot(0, PCI_DEVFN(0x18, 1)); + for(i=0;i<4;i++) { + dword = pci_read_config32(dev, 0xe0+i*4); + int_to_stream(dword, BUSN+i*5); + } + for(i=0;i<0x10;i++) { + dword = pci_read_config32(dev, 0x80+i*4); + int_to_stream(dword, MMIO+i*5); + } + for(i=0;i<0x08;i++) { + dword = pci_read_config32(dev, 0xc0+i*4); + int_to_stream(dword, PCIO+i*5); + } + + *SBLK = (uint8_t)(sysconf.sblk); + + msr = rdmsr(TOP_MEM); + int_to_stream(msr.lo, TOM1); + + for(i=0;i<sysconf.hc_possible_num;i++) { + int_to_stream(sysconf.pci1234[i], HCLK + i*5); + int_to_stream(sysconf.hcdn[i], HCDN + i*5); + } + for(i=sysconf.hc_possible_num; i<HC_POSSIBLE_NUM; i++) { // in case we set array size to other than 8 + int_to_stream(0x00000000, HCLK + i*5); + int_to_stream(0x20202020, HCDN + i*5); + } + + int_to_stream(sysconf.sbdn, SBDN); + + if((sysconf.pci1234[0] >> 12) & 0xff) { //sb chain on other than bus 0 + *CBST = (uint8_t) (0x0f); + } + else { + *CBST = (uint8_t) (0x00); + } + +} + +//end diff --git a/src/northbridge/amd/amdk8/amdk8_f.h b/src/northbridge/amd/amdk8/amdk8_f.h new file mode 100644 index 0000000000..7901d08525 --- /dev/null +++ b/src/northbridge/amd/amdk8/amdk8_f.h @@ -0,0 +1,561 @@ +#ifndef AMDK8_F_H + +#define AMDK8_F_H +/* Definitions of various K8 registers */ +/* Function 0 */ +#define HT_TRANSACTION_CONTROL 0x68 +#define HTTC_DIS_RD_B_P (1 << 0) +#define HTTC_DIS_RD_DW_P (1 << 1) +#define HTTC_DIS_WR_B_P (1 << 2) +#define HTTC_DIS_WR_DW_P (1 << 3) +#define HTTC_DIS_MTS (1 << 4) +#define HTTC_CPU1_EN (1 << 5) +#define HTTC_CPU_REQ_PASS_PW (1 << 6) +#define HTTC_CPU_RD_RSP_PASS_PW (1 << 7) +#define HTTC_DIS_P_MEM_C (1 << 8) +#define HTTC_DIS_RMT_MEM_C (1 << 9) +#define HTTC_DIS_FILL_P (1 << 10) +#define HTTC_RSP_PASS_PW (1 << 11) +#define HTTC_CHG_ISOC_TO_ORD (1 << 12) +#define HTTC_BUF_REL_PRI_SHIFT 13 +#define HTTC_BUF_REL_PRI_MASK 3 +#define HTTC_BUF_REL_PRI_64 0 +#define HTTC_BUF_REL_PRI_16 1 +#define HTTC_BUF_REL_PRI_8 2 +#define HTTC_BUF_REL_PRI_2 3 +#define HTTC_LIMIT_CLDT_CFG (1 << 15) +#define HTTC_LINT_EN (1 << 16) +#define HTTC_APIC_EXT_BRD_CST (1 << 17) +#define HTTC_APIC_EXT_ID (1 << 18) +#define HTTC_APIC_EXT_SPUR (1 << 19) +#define HTTC_SEQ_ID_SRC_NODE_EN (1 << 20) +#define HTTC_DS_NP_REQ_LIMIT_SHIFT 21 +#define HTTC_DS_NP_REQ_LIMIT_MASK 3 +#define HTTC_DS_NP_REQ_LIMIT_NONE 0 +#define HTTC_DS_NP_REQ_LIMIT_1 1 +#define HTTC_DS_NP_REQ_LIMIT_4 2 +#define HTTC_DS_NP_REQ_LIMIT_8 3 +#define HTTC_MED_PRI_BYP_CNT_SHIFT 24 +#define HTTC_MED_PRI_BYP_CNT_MASK 3 +#define HTTC_HI_PRI_BYP_CNT_SHIFT 26 +#define HTTC_HI_PRI_BYP_CNT_MASK 3 + + +/* Function 1 */ +#define PCI_IO_BASE0 0xc0 +#define PCI_IO_BASE1 0xc8 +#define PCI_IO_BASE2 0xd0 +#define PCI_IO_BASE3 0xd8 +#define PCI_IO_BASE_VGA_EN (1 << 4) +#define PCI_IO_BASE_NO_ISA (1 << 5) + + +/* Function 2 */ +#define DRAM_CSBASE 0x40 +#define DRAM_CSMASK 0x60 +#define DRAM_BANK_ADDR_MAP 0x80 + +#define DRAM_CTRL 0x78 +#define DC_RdPtrInit_SHIFT 0 +#define DC_RdPrtInit_MASK 0xf +#define DC_RdPadRcvFifoDly_SHIFT 4 +#define DC_RdPadRcvFifoDly_MASK 7 +#define DC_RdPadRcvFiloDly_1_5_CLK 2 +#define DC_RdPadRcvFiloDly_2_CLK 3 +#define DC_RdPadRcvFiloDly_2_5_CLK 4 +#define DC_RdPadRcvFiloDly_3_CLK 5 +#define DC_RdPadRcvFiloDly_3_5_CLK 6 +#define DC_AltVidC3MemClkTriEn (1<<16) +#define DC_DllTempAdjTime_SHIFT 17 +#define DC_DllTempAdjTime_MASK 1 +#define DC_DllTempAdjTime_5_MS 0 +#define DC_DllTempAdjTime_1_MS 1 +#define DC_DqsRcvEnTrain (1<<18) + +#define DRAM_INIT 0x7c +#define DI_MrsAddress_SHIFT 0 +#define DI_MrsAddress_MASK 0xffff +#define DI_MrsBank_SHIFT 16 +#define DI_MrsBank_MASK 7 +#define DI_SendRchgAll (1<<24) +#define DI_SendAutoRefresh (1<<25) +#define DI_SendMrsCmd (1<<26) +#define DI_DeassertMemRstX (1<<27) +#define DI_AssertCke (1<<28) +#define DI_EnDramInit (1<<31) + +#define DRAM_TIMING_LOW 0x88 +#define DTL_TCL_SHIFT 0 +#define DTL_TCL_MASK 7 +#define DTL_TCL_BASE 1 +#define DTL_TCL_MIN 3 +#define DTL_TCL_MAX 6 +#define DTL_TRCD_SHIFT 4 +#define DTL_TRCD_MASK 3 +#define DTL_TRCD_BASE 3 +#define DTL_TRCD_MIN 3 +#define DTL_TRCD_MAX 6 +#define DTL_TRP_SHIFT 8 +#define DTL_TRP_MASK 3 +#define DTL_TRP_BASE 3 +#define DTL_TRP_MIN 3 +#define DTL_TRP_MAX 6 +#define DTL_TRTP_SHIFT 11 +#define DTL_TRTP_MASK 1 +#define DTL_TRTP_BASE 2 +#define DTL_TRTP_MIN 2 /* 4 for 64 bytes*/ +#define DTL_TRTP_MAX 3 /* 5 for 64 bytes */ +#define DTL_TRAS_SHIFT 12 +#define DTL_TRAS_MASK 0xf +#define DTL_TRAS_BASE 3 +#define DTL_TRAS_MIN 5 +#define DTL_TRAS_MAX 18 +#define DTL_TRC_SHIFT 16 +#define DTL_TRC_MASK 0xf +#define DTL_TRC_BASE 11 +#define DTL_TRC_MIN 11 +#define DTL_TRC_MAX 26 +#define DTL_TWR_SHIFT 20 +#define DTL_TWR_MASK 3 +#define DTL_TWR_BASE 3 +#define DTL_TWR_MIN 3 +#define DTL_TWR_MAX 6 +#define DTL_TRRD_SHIFT 22 +#define DTL_TRRD_MASK 3 +#define DTL_TRRD_BASE 2 +#define DTL_TRRD_MIN 2 +#define DTL_TRRD_MAX 5 +#define DTL_MemClkDis_SHIFT 24 /* Channel A */ +#define DTL_MemClkDis3 (1 << 26) +#define DTL_MemClkDis2 (1 << 27) +#define DTL_MemClkDis1 (1 << 28) +#define DTL_MemClkDis0 (1 << 29) +#define DTL_MemClkDis1_AM2 (0x51 << 24) +#define DTL_MemClkDis0_AM2 (0xa2 << 24) +#define DTL_MemClkDis0_S1g1 (0xa2 << 24) + +/* DTL_MemClkDis for m2 and s1g1 is different */ + +#define DRAM_TIMING_HIGH 0x8c +#define DTH_TRWTTO_SHIFT 4 +#define DTH_TRWTTO_MASK 7 +#define DTH_TRWTTO_BASE 2 +#define DTH_TRWTTO_MIN 2 +#define DTH_TRWTTO_MAX 9 +#define DTH_TWTR_SHIFT 8 +#define DTH_TWTR_MASK 3 +#define DTH_TWTR_BASE 0 +#define DTH_TWTR_MIN 1 +#define DTH_TWTR_MAX 3 +#define DTH_TWRRD_SHIFT 10 +#define DTH_TWRRD_MASK 3 +#define DTH_TWRRD_BASE 0 +#define DTH_TWRRD_MIN 0 +#define DTH_TWRRD_MAX 3 +#define DTH_TWRWR_SHIFT 12 +#define DTH_TWRWR_MASK 3 +#define DTH_TWRWR_BASE 1 +#define DTH_TWRWR_MIN 1 +#define DTH_TWRWR_MAX 3 +#define DTH_TRDRD_SHIFT 14 +#define DTH_TRDRD_MASK 3 +#define DTH_TRDRD_BASE 2 +#define DTH_TRDRD_MIN 2 +#define DTH_TRDRD_MAX 5 +#define DTH_TREF_SHIFT 16 +#define DTH_TREF_MASK 3 +#define DTH_TREF_7_8_US 2 +#define DTH_TREF_3_9_US 3 +#define DTH_TRFC0_SHIFT 20 /* for Logical DIMM0 */ +#define DTH_TRFC_MASK 7 +#define DTH_TRFC_75_256M 0 +#define DTH_TRFC_105_512M 1 +#define DTH_TRFC_127_5_1G 2 +#define DTH_TRFC_195_2G 3 +#define DTH_TRFC_327_5_4G 4 +#define DTH_TRFC1_SHIFT 23 /*for Logical DIMM1 */ +#define DTH_TRFC2_SHIFT 26 /*for Logical DIMM2 */ +#define DTH_TRFC3_SHIFT 29 /*for Logical DIMM3 */ + +#define DRAM_CONFIG_LOW 0x90 +#define DCL_InitDram (1<<0) +#define DCL_ExitSelfRef (1<<1) +#define DCL_DramTerm_SHIFT 4 +#define DCL_DramTerm_MASK 3 +#define DCL_DramTerm_No 0 +#define DCL_DramTerm_75_OH 1 +#define DCL_DramTerm_150_OH 2 +#define DCL_DramTerm_50_OH 3 +#define DCL_DrvWeak (1<<7) +#define DCL_ParEn (1<<8) +#define DCL_SelfRefRateEn (1<<9) +#define DCL_BurstLength32 (1<<10) +#define DCL_Width128 (1<<11) +#define DCL_X4Dimm_SHIFT 12 +#define DCL_X4Dimm_MASK 0xf +#define DCL_UnBuffDimm (1<<16) +#define DCL_DimmEccEn (1<<19) + +#define DRAM_CONFIG_HIGH 0x94 +#define DCH_MemClkFreq_SHIFT 0 +#define DCH_MemClkFreq_MASK 7 +#define DCH_MemClkFreq_200MHz 0 +#define DCH_MemClkFreq_266MHz 1 +#define DCH_MemClkFreq_333MHz 2 +#define DCH_MemClkFreq_400MHz 3 +#define DCH_MemClkFreqVal (1<<3) +#define DCH_MaxAsyncLat_SHIFT 4 +#define DCH_MaxAsyncLat_MASK 0xf +#define DCH_MaxAsyncLat_BASE 0 +#define DCH_MaxAsyncLat_MIN 0 +#define DCH_MaxAsyncLat_MAX 15 +#define DCH_RDqsEn (1<<12) +#define DCH_DisDramInterface (1<<14) +#define DCH_PowerDownEn (1<<15) +#define DCH_PowerDownMode_SHIFT 16 +#define DCH_PowerDownMode_MASK 1 +#define DCH_PowerDownMode_Channel_CKE 0 +#define DCH_PowerDownMode_ChipSelect_CKE 1 +#define DCH_FourRankSODimm (1<<17) +#define DCH_FourRankRDimm (1<<18) +#define DCH_SlowAccessMode (1<<19) +#define DCH_BankSwizzleMode (1<<22) +#define DCH_DcqBypassMax_SHIFT 24 +#define DCH_DcqBypassMax_MASK 0xf +#define DCH_DcqBypassMax_BASE 0 +#define DCH_DcqBypassMax_MIN 0 +#define DCH_DcqBypassMax_MAX 15 +#define DCH_FourActWindow_SHIFT 28 +#define DCH_FourActWindow_MASK 0xf +#define DCH_FourActWindow_BASE 7 +#define DCH_FourActWindow_MIN 8 +#define DCH_FourActWindow_MAX 20 + + +// for 0x98 index and 0x9c data +#define DRAM_CTRL_ADDI_DATA_OFFSET 0x98 +#define DCAO_DctOffset_SHIFT 0 +#define DCAO_DctOffset_MASK 0x3fffffff +#define DCAO_DctAccessWrite (1<<30) +#define DCAO_DctAccessDone (1<<31) + +#define DRAM_CTRL_ADDI_DATA_PORT 0x9c + +#define DRAM_OUTPUT_DRV_COMP_CTRL 0x00 +#define DODCC_CkeDrvStren_SHIFT 0 +#define DODCC_CkeDrvStren_MASK 3 +#define DODCC_CkeDrvStren_1_0X 0 +#define DODCC_CkeDrvStren_1_25X 1 +#define DODCC_CkeDrvStren_1_5X 2 +#define DODCC_CkeDrvStren_2_0X 3 +#define DODCC_CsOdtDrvStren_SHIFT 4 +#define DODCC_CsOdtDrvStren_MASK 3 +#define DODCC_CsOdtDrvStren_1_0X 0 +#define DODCC_CsOdtDrvStren_1_25X 1 +#define DODCC_CsOdtDrvStren_1_5X 2 +#define DODCC_CsOdtDrvStren_2_0X 3 +#define DODCC_AddrCmdDrvStren_SHIFT 8 +#define DODCC_AddrCmdDrvStren_MASK 3 +#define DODCC_AddrCmdDrvStren_1_0X 0 +#define DODCC_AddrCmdDrvStren_1_25X 1 +#define DODCC_AddrCmdDrvStren_1_5X 2 +#define DODCC_AddrCmdDrvStren_2_0X 3 +#define DODCC_ClkDrvStren_SHIFT 12 +#define DODCC_ClkDrvStren_MASK 3 +#define DODCC_ClkDrvStren_0_75X 0 +#define DODCC_ClkDrvStren_1_0X 1 +#define DODCC_ClkDrvStren_1_25X 2 +#define DODCC_ClkDrvStren_1_5X 3 +#define DODCC_DataDrvStren_SHIFT 16 +#define DODCC_DataDrvStren_MASK 3 +#define DODCC_DataDrvStren_0_75X 0 +#define DODCC_DataDrvStren_1_0X 1 +#define DODCC_DataDrvStren_1_25X 2 +#define DODCC_DataDrvStren_1_5X 3 +#define DODCC_DqsDrvStren_SHIFT 20 +#define DODCC_DqsDrvStren_MASK 3 +#define DODCC_DqsDrvStren_0_75X 0 +#define DODCC_DqsDrvStren_1_0X 1 +#define DODCC_DqsDrvStren_1_25X 2 +#define DODCC_DqsDrvStren_1_5X 3 +#define DODCC_ProcOdt_SHIFT 28 +#define DODCC_ProcOdt_MASK 3 +#define DODCC_ProcOdt_300_OHMS 0 +#define DODCC_ProcOdt_150_OHMS 1 +#define DODCC_ProcOdt_75_OHMS 2 + +#define DRAM_WRITE_DATA_TIMING_CTRL_LOW 0x01 +#define DWDTCL_WrDatTimeByte0_SHIFT 0 +#define DWDTC_WrDatTimeByte_MASK 0x3f +#define DWDTC_WrDatTimeByte_BASE 0 +#define DWDTC_WrDatTimeByte_MIN 0 +#define DWDTC_WrDatTimeByte_MAX 47 +#define DWDTCL_WrDatTimeByte1_SHIFT 8 +#define DWDTCL_WrDatTimeByte2_SHIFT 16 +#define DWDTCL_WrDatTimeByte3_SHIFT 24 + +#define DRAM_WRITE_DATA_TIMING_CTRL_HIGH 0x02 +#define DWDTCH_WrDatTimeByte4_SHIFT 0 +#define DWDTCH_WrDatTimeByte5_SHIFT 8 +#define DWDTCH_WrDatTimeByte6_SHIFT 16 +#define DWDTCH_WrDatTimeByte7_SHIFT 24 + +#define DRAM_WRITE_DATA_ECC_TIMING_CTRL 0x03 +#define DWDETC_WrChkTime_SHIFT 0 +#define DWDETC_WrChkTime_MASK 0x3f +#define DWDETC_WrChkTime_BASE 0 +#define DWDETC_WrChkTime_MIN 0 +#define DWDETC_WrChkTime_MAX 47 + +#define DRAM_ADDR_TIMING_CTRL 0x04 +#define DATC_CkeFineDelay_SHIFT 0 +#define DATC_CkeFineDelay_MASK 0x1f +#define DATC_CkeFineDelay_BASE 0 +#define DATC_CkeFineDelay_MIN 0 +#define DATC_CkeFineDelay_MAX 31 +#define DATC_CkeSetup (1<<5) +#define DATC_CsOdtFineDelay_SHIFT 8 +#define DATC_CsOdtFineDelay_MASK 0x1f +#define DATC_CsOdtFineDelay_BASE 0 +#define DATC_CsOdtFineDelay_MIN 0 +#define DATC_CsOdtFineDelay_MAX 31 +#define DATC_CsOdtSetup (1<<13) +#define DATC_AddrCmdFineDelay_SHIFT 16 +#define DATC_AddrCmdFineDelay_MASK 0x1f +#define DATC_AddrCmdFineDelay_BASE 0 +#define DATC_AddrCmdFineDelay_MIN 0 +#define DATC_AddrCmdFineDelay_MAX 31 +#define DATC_AddrCmdSetup (1<<21) + +#define DRAM_READ_DQS_TIMING_CTRL_LOW 0x05 +#define DRDTCL_RdDqsTimeByte0_SHIFT 0 +#define DRDTC_RdDqsTimeByte_MASK 0x3f +#define DRDTC_RdDqsTimeByte_BASE 0 +#define DRDTC_RdDqsTimeByte_MIN 0 +#define DRDTC_RdDqsTimeByte_MAX 47 +#define DRDTCL_RdDqsTimeByte1_SHIFT 8 +#define DRDTCL_RdDqsTimeByte2_SHIFT 16 +#define DRDTCL_RdDqsTimeByte3_SHIFT 24 + +#define DRAM_READ_DQS_TIMING_CTRL_HIGH 0x06 +#define DRDTCH_RdDqsTimeByte4_SHIFT 0 +#define DRDTCH_RdDqsTimeByte5_SHIFT 8 +#define DRDTCH_RdDqsTimeByte6_SHIFT 16 +#define DRDTCH_RdDqsTimeByte7_SHIFT 24 + +#define DRAM_READ_DQS_ECC_TIMING_CTRL 0x07 +#define DRDETC_RdDqsTimeCheck_SHIFT 0 +#define DRDETC_RdDqsTimeCheck_MASK 0x3f +#define DRDETC_RdDqsTimeCheck_BASE 0 +#define DRDETC_RdDqsTimeCheck_MIN 0 +#define DRDETC_RdDqsTimeCheck_MAX 47 + +#define DRAM_DQS_RECV_ENABLE_TIME0 0x10 +#define DDRET_DqsRcvEnDelay_SHIFT 0 +#define DDRET_DqsRcvEnDelay_MASK 0xff +#define DDRET_DqsRcvEnDelay_BASE 0 +#define DDRET_DqsRcvEnDelay_MIN 0 +#define DDRET_DqsRcvEnDelay_MAX 0xae /* unit is 50ps */ + +#define DRAM_DQS_RECV_ENABLE_TIME1 0x13 +#define DRAM_DQS_RECV_ENABLE_TIME2 0x16 +#define DRAM_DQS_RECV_ENABLE_TIME3 0x19 + +/* there are index 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x30, 0x33, 0x36, 0x39 +that are corresponding to 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x13, 0x16, 0x19 +*/ +#define DRAM_CTRL_MISC 0xa0 +#define DCM_MemClrStatus (1<<0) +#define DCM_DisableJitter (1<<1) +#define DCM_RdWrQByp_SHIFT 2 +#define DCM_RdWrQByp_MASK 3 +#define DCM_RdWrQByp_2 0 +#define DCM_RdWrQByp_4 1 +#define DCM_RdWrQByp_8 2 +#define DCM_RdWrQByp_16 3 +#define DCM_Mode64BitMux (1<<4) +#define DCM_DCC_EN (1<<5) +#define DCM_ILD_lmt_SHIFT 6 +#define DCM_ILD_lmt_MASK 7 +#define DCM_ILD_lmt_0 0 +#define DCM_ILD_lmt_4 1 +#define DCM_ILD_lmt_8 2 +#define DCM_ILD_lmt_16 3 +#define DCM_ILD_lmt_32 4 +#define DCM_ILD_lmt_64 5 +#define DCM_ILD_lmt_128 6 +#define DCM_ILD_lmt_256 7 +#define DCM_DramEnabled (1<<9) +#define DCM_MemClkDis_SHIFT 24 /* Channel B */ +#define DCM_MemClkDis3 (1 << 26) +#define DCM_MemClkDis2 (1 << 27) +#define DCM_MemClkDis1 (1 << 28) +#define DCM_MemClkDis0 (1 << 29) + + +/* Function 3 */ +#define MCA_NB_CONFIG 0x44 +#define MNC_ECC_EN (1 << 22) +#define MNC_CHIPKILL_EN (1 << 23) + +#define SCRUB_CONTROL 0x58 +#define SCRUB_NONE 0 +#define SCRUB_40ns 1 +#define SCRUB_80ns 2 +#define SCRUB_160ns 3 +#define SCRUB_320ns 4 +#define SCRUB_640ns 5 +#define SCRUB_1_28us 6 +#define SCRUB_2_56us 7 +#define SCRUB_5_12us 8 +#define SCRUB_10_2us 9 +#define SCRUB_20_5us 10 +#define SCRUB_41_0us 11 +#define SCRUB_81_9us 12 +#define SCRUB_163_8us 13 +#define SCRUB_327_7us 14 +#define SCRUB_655_4us 15 +#define SCRUB_1_31ms 16 +#define SCRUB_2_62ms 17 +#define SCRUB_5_24ms 18 +#define SCRUB_10_49ms 19 +#define SCRUB_20_97ms 20 +#define SCRUB_42ms 21 +#define SCRUB_84ms 22 +#define SC_DRAM_SCRUB_RATE_SHFIT 0 +#define SC_DRAM_SCRUB_RATE_MASK 0x1f +#define SC_L2_SCRUB_RATE_SHIFT 8 +#define SC_L2_SCRUB_RATE_MASK 0x1f +#define SC_L1D_SCRUB_RATE_SHIFT 16 +#define SC_L1D_SCRUB_RATE_MASK 0x1f + +#define SCRUB_ADDR_LOW 0x5C + +#define SCRUB_ADDR_HIGH 0x60 + +#define NORTHBRIDGE_CAP 0xE8 +#define NBCAP_128Bit (1 << 0) +#define NBCAP_MP (1 << 1) +#define NBCAP_BIG_MP (1 << 2) +#define NBCAP_ECC (1 << 3) +#define NBCAP_CHIPKILL_ECC (1 << 4) +#define NBCAP_MEMCLK_SHIFT 5 +#define NBCAP_MEMCLK_MASK 3 +#define NBCAP_MEMCLK_200MHZ 3 +#define NBCAP_MEMCLK_266MHZ 2 +#define NBCAP_MEMCLK_333MHZ 1 +#define NBCAP_MEMCLK_NOLIMIT 0 +#define NBCAP_MEMCTRL (1 << 8) +#define NBCAP_HtcCap (1<<10) +#define NBCAP_CmpCap_SHIFT 12 +#define NBCAP_CmpCap_MASK 3 + + +#define LinkConnected (1 << 0) +#define InitComplete (1 << 1) +#define NonCoherent (1 << 2) +#define ConnectionPending (1 << 4) + + +#include "raminit.h" +//struct definitions + +struct dimm_size { + uint8_t per_rank; // it is rows + col + bank_lines + data lines */ + uint8_t rows; + uint8_t col; + uint8_t bank; //1, 2, 3 mean 2, 4, 8 + uint8_t rank; +} __attribute__((packed)); + +struct mem_info { // pernode + uint32_t dimm_mask; + struct dimm_size sz[DIMM_SOCKETS]; + uint32_t x4_mask; + uint32_t x16_mask; + uint32_t single_rank_mask; + uint32_t page_1k_mask; +// uint32_t ecc_mask; +// uint32_t registered_mask; + uint8_t is_opteron; + uint8_t is_registered; + uint8_t is_ecc; + uint8_t is_Width128; + uint8_t memclk_set; // we need to use this to retrieve the mem param + uint8_t rsv[3]; +} __attribute__((packed)); + +struct link_pair_st { + device_t udev; + uint32_t upos; + uint32_t uoffs; + device_t dev; + uint32_t pos; + uint32_t offs; + +} __attribute__((packed)); + +struct sys_info { + uint8_t ctrl_present[NODE_NUMS]; + struct mem_info meminfo[NODE_NUMS]; + struct mem_controller ctrl[NODE_NUMS]; + uint8_t mem_trained[NODE_NUMS]; + uint32_t tom_k; + uint32_t tom2_k; + + uint32_t mem_base[NODE_NUMS]; + uint32_t cs_base[NODE_NUMS*8]; //8 cs_idx + uint32_t hole_reg[NODE_NUMS]; // can we spare it to one, and put ctrl idx in it + + uint8_t dqs_delay_a[NODE_NUMS*2*2*9]; //8 node channel 2, direction 2 , bytelane *9 + uint8_t dqs_rcvr_dly_a[NODE_NUMS*2*8]; //8 node, channel 2, receiver 8 + uint32_t nodes; + struct link_pair_st link_pair[16];// enough? only in_conherent + uint32_t link_pair_num; + uint32_t ht_c_num; + uint32_t sbdn; + uint32_t sblk; + uint32_t sbbusn; +} __attribute__((packed)); + +#if MEM_TRAIN_SEQ == 1 + +static void wait_all_core0_mem_trained(struct sys_info *sysinfo) +{ + int i; + uint32_t mask = 0; + + if(sysinfo->nodes == 1) return; // in case only one cpu installed + + for(i=1; i<sysinfo->nodes; i++) { + if (!sysinfo->ctrl_present[ i ]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + mask |= (1<<i); + + } + + i = 1; + while(1) { + if(mask & (1<<i)) { + if((sysinfo->mem_trained[i])) { + mask &= ~(1<<i); + } + } + + if(!mask) break; + + /* cpu_relax */ + __asm__ __volatile__("rep;nop": : :"memory"); + + i++; + i%=sysinfo->nodes; + } + +} +#endif + +#endif /* AMDK8_F_H */ diff --git a/src/northbridge/amd/amdk8/amdk8_f_pci.c b/src/northbridge/amd/amdk8/amdk8_f_pci.c new file mode 100644 index 0000000000..579d9fec70 --- /dev/null +++ b/src/northbridge/amd/amdk8/amdk8_f_pci.c @@ -0,0 +1,57 @@ +#ifndef AMDK8_F_PCI_C + +#define AMDK8_F_PCI_C +/* bit [10,8] are dev func, bit[1,0] are dev index */ +static uint32_t pci_read_config32_index(device_t dev, uint32_t index_reg, uint32_t index) +{ + uint32_t dword; + + pci_write_config32(dev, index_reg, index); + + dword = pci_read_config32(dev, index_reg+0x4); + + return dword; +} + +static void pci_write_config32_index(device_t dev, uint32_t index_reg, uint32_t index, uint32_t data) +{ + + pci_write_config32(dev, index_reg, index); + + pci_write_config32(dev, index_reg + 0x4, data); + +} + +static uint32_t pci_read_config32_index_wait(device_t dev, uint32_t index_reg, uint32_t index) +{ + + uint32_t dword; + + index &= ~(1<<30); + pci_write_config32(dev, index_reg, index); + + do { + dword = pci_read_config32(dev, index_reg); + } while (!(dword & (1<<31))); + + dword = pci_read_config32(dev, index_reg+0x4); + + return dword; +} + +static void pci_write_config32_index_wait(device_t dev, uint32_t index_reg, uint32_t index, uint32_t data) +{ + + uint32_t dword; + + pci_write_config32(dev, index_reg + 0x4, data); + + index |= (1<<30); + pci_write_config32(dev, index_reg, index); + do { + dword = pci_read_config32(dev, index_reg); + } while (!(dword & (1<<31))); + +} + +#endif diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c index d29831b74b..943d586bc3 100644 --- a/src/northbridge/amd/amdk8/coherent_ht.c +++ b/src/northbridge/amd/amdk8/coherent_ht.c @@ -1642,6 +1642,10 @@ static unsigned count_cpus(unsigned nodes) #endif } +static inline unsigned get_nodes(void) +{ + return ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1; +} static void coherent_ht_finalize(unsigned nodes) { diff --git a/src/northbridge/amd/amdk8/coherent_ht_car.c b/src/northbridge/amd/amdk8/coherent_ht_car.c index 5b9a87ac63..c0c4b338b9 100644 --- a/src/northbridge/amd/amdk8/coherent_ht_car.c +++ b/src/northbridge/amd/amdk8/coherent_ht_car.c @@ -285,12 +285,14 @@ static uint16_t read_freq_cap(device_t dev, uint8_t pos) freq_cap = pci_read_config16(dev, pos); freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */ +#if K8_REV_F_SUPPORT == 0 #if K8_HT_FREQ_1G_SUPPORT == 1 if (!is_cpu_pre_e0()) { return freq_cap; } #endif +#endif id = pci_read_config32(dev, 0); @@ -1590,7 +1592,9 @@ static unsigned verify_dualcore(unsigned nodes) static void coherent_ht_finalize(unsigned nodes) { unsigned node; +#if K8_REV_F_SUPPORT == 0 int rev_a0; +#endif #if CONFIG_LOGICAL_CPUS==1 unsigned total_cpus; @@ -1609,7 +1613,11 @@ static void coherent_ht_finalize(unsigned nodes) */ print_spew("coherent_ht_finalize\r\n"); + +#if K8_REV_F_SUPPORT == 0 rev_a0 = is_cpu_rev_a0(); +#endif + for (node = 0; node < nodes; node++) { device_t dev; uint32_t val; @@ -1638,11 +1646,13 @@ static void coherent_ht_finalize(unsigned nodes) (3 << HTTC_HI_PRI_BYP_CNT_SHIFT); pci_write_config32(dev, HT_TRANSACTION_CONTROL, val); +#if K8_REV_F_SUPPORT == 0 if (rev_a0) { pci_write_config32(dev, 0x94, 0); pci_write_config32(dev, 0xb4, 0); pci_write_config32(dev, 0xd4, 0); } +#endif } print_spew("done\r\n"); @@ -1656,6 +1666,7 @@ static int apply_cpu_errata_fixes(unsigned nodes) device_t dev; uint32_t cmd; dev = NODE_MC(node); +#if K8_REV_F_SUPPORT == 0 if (is_cpu_pre_c0()) { /* Errata 66 @@ -1697,6 +1708,7 @@ static int apply_cpu_errata_fixes(unsigned nodes) needs_reset = 1; /* Needed? */ } } +#endif } return needs_reset; } @@ -1734,6 +1746,11 @@ static int optimize_link_read_pointers(unsigned nodes) return needs_reset; } +static inline unsigned get_nodes(void) +{ + return ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1; +} + static int optimize_link_coherent_ht(void) { int needs_reset = 0; diff --git a/src/northbridge/amd/amdk8/get_sblk_pci1234.c b/src/northbridge/amd/amdk8/get_sblk_pci1234.c index eff6ca1c62..61e89028e1 100644 --- a/src/northbridge/amd/amdk8/get_sblk_pci1234.c +++ b/src/northbridge/amd/amdk8/get_sblk_pci1234.c @@ -39,6 +39,8 @@ acknowledgement of AMD's proprietary rights in them. #include <string.h> #include <stdint.h> +#include <cpu/amd/amdk8_sysconf.h> + #if 0 unsigned node_link_to_bus(unsigned node, unsigned link) @@ -77,13 +79,6 @@ unsigned node_link_to_bus(unsigned node, unsigned link) #endif -extern unsigned pci1234[]; -extern unsigned hcdn[]; -extern unsigned hc_possible_num; -extern unsigned sblk; - -unsigned hcdn_reg[4]; // defined in northbridge.c - /* why we need pci1234 array final result for pci1234 will be pci1234[0] will record sblink and bus range @@ -152,6 +147,13 @@ unsigned hcdn_reg[4]; // defined in northbridge.c So Max HC_POSSIBLE_NUM is 8 + 1n: 3 + 2n: 2x2 - 1 + 4n: 1x4 - 2 + 6n: 2 + 8n: 2 + Total: 12 + just put all the possible ht node/link to the list tp pci1234[] in get_bus_conf.c on MB dir Also don't forget to increase the ACPI_SSDTX_NUM etc if you have too much SSDT @@ -169,11 +171,11 @@ void get_sblk_pci1234(void) /* read PCI_DEV(0,0x18,0) 0x64 bit [8:9] to find out SbLink m */ dev = dev_find_slot(0, PCI_DEVFN(0x18,0)); dword = pci_read_config32(dev, 0x64); - sblk = (dword>>8) & 0x3; + sysconf.sblk = (dword>>8) & 0x3; dword &=0x0300; dword |= 1; - pci1234[0] = dword; + sysconf.pci1234[0] = dword; /*about hardcode numbering for HT_IO support set the node_id and link_id that could have ht chain in the one array, @@ -189,35 +191,35 @@ void get_sblk_pci1234(void) dwordx = pci_read_config32(dev, 0xe0+j*4); dwordx &=0xffff0ff1; //keep bus num, node_id, link_num, enable bits if((dwordx & 0xff1) == dword) { //SBLINK - pci1234[0] = dwordx; - hcdn[0] = hcdn_reg[j]; + sysconf.pci1234[0] = dwordx; + sysconf.hcdn[0] = sysconf.hcdn_reg[j]; continue; } if((dwordx & 1) == 1) { // We need to find out the number of HC // for exact match - for(i=1;i<hc_possible_num;i++) { - if((dwordx & 0xff0) == (pci1234[i] & 0xff0)) { - pci1234[i] = dwordx; - hcdn[i] = hcdn_reg[j]; + for(i=1;i<sysconf.hc_possible_num;i++) { + if((dwordx & 0xff0) == (sysconf.pci1234[i] & 0xff0)) { + sysconf.pci1234[i] = dwordx; + sysconf.hcdn[i] = sysconf.hcdn_reg[j]; break; } } // for 0xff0 match or same node - for(i=1;i<hc_possible_num;i++) { - if((dwordx & 0xff0) == (dwordx & pci1234[i] & 0xff0)) { - pci1234[i] = dwordx; - hcdn[i] = hcdn_reg[j]; + for(i=1;i<sysconf.hc_possible_num;i++) { + if((dwordx & 0xff0) == (dwordx & sysconf.pci1234[i] & 0xff0)) { + sysconf.pci1234[i] = dwordx; + sysconf.hcdn[i] = sysconf.hcdn_reg[j]; break; } } } } - for(i=1;i<hc_possible_num;i++) { - if((pci1234[i] & 1) != 1) { - pci1234[i] = 0; - hcdn[i] = 0x20202020; + for(i=1;i<sysconf.hc_possible_num;i++) { + if((sysconf.pci1234[i] & 1) != 1) { + sysconf.pci1234[i] = 0; + sysconf.hcdn[i] = 0x20202020; } } diff --git a/src/northbridge/amd/amdk8/incoherent_ht.c b/src/northbridge/amd/amdk8/incoherent_ht.c index b07c62a5dc..012caf1501 100644 --- a/src/northbridge/amd/amdk8/incoherent_ht.c +++ b/src/northbridge/amd/amdk8/incoherent_ht.c @@ -1,11 +1,8 @@ /* - * incoherent hypertransport enumeration - * originally written by Eric Biederman - * - * 2004.12 yhlu add multi ht chain dynamically support - * 2005.11 yhlu add let real sb to use small unitid - * 2006.03 stepan cleanups - */ + This should be done by Eric + 2004.12 yhlu add multi ht chain dynamically support + 2005.11 yhlu add let real sb to use small unitid +*/ #include <device/pci_def.h> #include <device/pci_ids.h> #include <device/hypertransport_def.h> @@ -22,10 +19,7 @@ #define K8_ALLOCATE_IO_RANGE 0 #endif -/* Do we need to allocate MMIO? Currently we direct the last 64M - * to the southbridge link only. We have to remain access to the - * 4G-4M range for the southbridge (Flash ROM) - */ +// Do we need allocate MMIO? Current We direct last 64M to sblink only, We can not lose access to last 4M range to ROM #ifndef K8_ALLOCATE_MMIO_RANGE #define K8_ALLOCATE_MMIO_RANGE 0 #endif @@ -55,9 +49,7 @@ static uint8_t ht_lookup_capability(device_t dev, uint16_t val) if (pos > PCI_CAP_LIST_NEXT) { pos = pci_read_config8(dev, pos); } - - /* loop through the linked list */ - while(pos != 0) { + while(pos != 0) { /* loop through the linked list */ uint8_t cap; cap = pci_read_config8(dev, pos + PCI_CAP_LIST_ID); if (cap == PCI_CAP_ID_HT) { @@ -89,7 +81,7 @@ static void ht_collapse_previous_enumeration(uint8_t bus, unsigned offset_unitid device_t dev; uint32_t id; - // actually, only for one HT device HT chain, and unitid is 0 + //actually, only for one HT device HT chain, and unitid is 0 #if HT_CHAIN_UNITID_BASE == 0 if(offset_unitid) { return; @@ -158,9 +150,11 @@ static uint16_t ht_read_freq_cap(device_t dev, uint8_t pos) /* AMD K8 Unsupported 1Ghz? */ if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) { #if K8_HT_FREQ_1G_SUPPORT == 1 + #if K8_REV_F_SUPPORT == 0 if (is_cpu_pre_e0()) { // only E0 later support 1GHz freq_cap &= ~(1 << HT_FREQ_1000Mhz); } + #endif #else freq_cap &= ~(1 << HT_FREQ_1000Mhz); #endif @@ -268,33 +262,22 @@ static int ht_optimize_link( return needs_reset; } - #if (USE_DCACHE_RAM == 1) && (K8_SCAN_PCI_BUS == 1) #if RAMINIT_SYSINFO == 1 -static void ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, - unsigned offset_unitid, struct sys_info *sysinfo); - -static int scan_pci_bus(unsigned bus, struct sys_info *sysinfo) +static void ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid, struct sys_info *sysinfo); +static int scan_pci_bus( unsigned bus , struct sys_info *sysinfo) #else -static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, - unsigned offset_unitid); - -static int scan_pci_bus(unsigned bus) +static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid); +static int scan_pci_bus( unsigned bus) #endif { - /* Here we already can access PCI_DEV(bus, 0, 0) to - * PCI_DEV(bus, 0x1f, 0x7). - * - * So scan these devices to find out whether there are more bridges. - * - * - If we find a pci bridge, set the bus number in the bridge, and - * continue with the next device. - * - * - For hypertransport bridges, set the bus number in the bridge and - * call ht_setup_chainx(), and scan_pci_bus() - * - */ + /* + here we already can access PCI_DEV(bus, 0, 0) to PCI_DEV(bus, 0x1f, 0x7) + So We can scan these devices to find out if they are bridge + If it is pci bridge, We need to set busn in bridge, and go on + For ht bridge, We need to set the busn in bridge and ht_setup_chainx, and the scan_pci_bus + */ unsigned int devfn; unsigned new_bus; unsigned max_bus; @@ -356,18 +339,12 @@ static int scan_pci_bus(unsigned bus) ((unsigned int) max_bus << 16)); pci_write_config32(dev, PCI_PRIMARY_BUS, buses); - /* Here we need to figure out if dev is a ht - * bridge. If it is, we need to call - * ht_setup_chainx() first - * - * Not verified --- yhlu - */ - - uint8_t upos; // is this valid C? - - // one func one ht sub - upos = ht_lookup_host_capability(dev); - + /* here we need to figure out if dev is a ht bridge + if it is ht bridge, we need to call ht_setup_chainx at first + Not verified --- yhlu + */ + uint8_t upos; + upos = ht_lookup_host_capability(dev); // one func one ht sub if (upos) { // sub ht chain uint8_t busn; busn = (new_bus & 0xff); @@ -390,7 +367,7 @@ static int scan_pci_bus(unsigned bus) buses = (buses & 0xff00ffff) | ((unsigned int) (new_bus & 0xff) << 16); - pci_write_config32(dev, PCI_PRIMARY_BUS, buses); + pci_write_config32(dev, PCI_PRIMARY_BUS, buses); pci_write_config16(dev, PCI_COMMAND, cr); @@ -405,7 +382,6 @@ static int scan_pci_bus(unsigned bus) * time probing another function. * Skip to next device. */ - if ( ((devfn & 0x07) == 0x00) && ((hdr_type & 0x80) != 0x80)) { devfn += 0x07; @@ -422,9 +398,7 @@ static void ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned o static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid) #endif { - // execute this function even with HT_CHAIN_UNITID_BASE == 0, - // because of the end_of_chain check, and we need it to - // optimize the links + //even HT_CHAIN_UNITID_BASE == 0, we still can go through this function, because of end_of_chain check, also We need it to optimize link uint8_t next_unitid, last_unitid; unsigned uoffs; @@ -434,8 +408,7 @@ static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned of #endif #if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE - // record the device id of last ht device, so we can set the - // unit id to HT_CHAIN_END_UNITID_BASE + //let't record the device of last ht device, So we can set the Unitid to HT_CHAIN_END_UNITID_BASE unsigned real_last_unitid; uint8_t real_last_pos; int ht_dev_num = 0; @@ -519,17 +492,15 @@ static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned of next_unitid += count; - /* Find which side of the ht link we are on, by reading - * which direction our last write to PCI_CAP_FLAGS came - * from. + /* Find which side of the ht link we are on, + * by reading which direction our last write to PCI_CAP_FLAGS + * came from. */ flags = pci_read_config16(dev, pos + PCI_CAP_FLAGS); offs = ((flags>>10) & 1) ? PCI_HT_SLAVE1_OFFS : PCI_HT_SLAVE0_OFFS; #if RAMINIT_SYSINFO == 1 - /* store the link pair here and we will setup the - * Hypertransport link later, after we get final FID/VID - */ + /* store the link pair here and we will Setup the Hypertransport link later, after we get final FID/VID */ { struct link_pair_st *link_pair = &sysinfo->link_pair[sysinfo->link_pair_num]; link_pair->udev = udev; @@ -544,7 +515,7 @@ static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned of reset_needed |= ht_optimize_link(udev, upos, uoffs, dev, pos, offs); #endif - /* Remember the location of the last device */ + /* Remeber the location of the last device */ udev = dev; upos = pos; uoffs = ( offs != PCI_HT_SLAVE0_OFFS ) ? PCI_HT_SLAVE0_OFFS : PCI_HT_SLAVE1_OFFS; @@ -586,8 +557,6 @@ end_of_chain: ; } - - #if RAMINIT_SYSINFO == 1 static void ht_setup_chain(device_t udev, unsigned upos, struct sys_info *sysinfo) #else @@ -618,10 +587,7 @@ static int ht_setup_chain(device_t udev, unsigned upos) return ht_setup_chainx(udev, upos, 0, offset_unitid); #endif } - - -static int optimize_link_read_pointer(uint8_t node, uint8_t linkn, - uint8_t linkt, uint8_t val) +static int optimize_link_read_pointer(uint8_t node, uint8_t linkn, uint8_t linkt, uint8_t val) { uint32_t dword, dword_old; uint8_t link_type; @@ -630,17 +596,16 @@ static int optimize_link_read_pointer(uint8_t node, uint8_t linkn, dword = pci_read_config32(PCI_DEV(0,0x18+node,0), 0x98 + (linkn * 0x20)); link_type = dword & 0xff; - dword_old = dword = pci_read_config32(PCI_DEV(0,0x18+node,3), 0xdc); - /* coherent link only linkt = 3, non coherent = 7*/ - if ( (link_type & 7) == linkt ) { + if ( (link_type & 7) == linkt ) { /* Coherent Link only linkt = 3, ncoherent = 7*/ + dword_old = dword = pci_read_config32(PCI_DEV(0,0x18+node,3), 0xdc); dword &= ~( 0xff<<(linkn *8) ); dword |= val << (linkn *8); - } - if (dword != dword_old) { - pci_write_config32(PCI_DEV(0,0x18+node,3), 0xdc, dword); - return 1; + if (dword != dword_old) { + pci_write_config32(PCI_DEV(0,0x18+node,3), 0xdc, dword); + return 1; + } } return 0; @@ -658,14 +623,22 @@ static int optimize_link_read_pointers_chain(uint8_t ht_c_num) uint8_t nodeid, linkn; uint8_t busn; uint8_t val; + unsigned devn = 1; + + #if HT_CHAIN_UNITID_BASE != 1 + #if SB_HT_CHAIN_UNITID_OFFSET_ONLY == 1 + if(i==0) // to check if it is sb ht chain + #endif + devn = HT_CHAIN_UNITID_BASE; + #endif reg = pci_read_config32(PCI_DEV(0,0x18,1), 0xe0 + i * 4); nodeid = ((reg & 0xf0)>>4); // nodeid linkn = ((reg & 0xf00)>>8); // link n busn = (reg & 0xff0000)>>16; //busn - - reg = pci_read_config32( PCI_DEV(busn, 1, 0), PCI_VENDOR_ID); + + reg = pci_read_config32( PCI_DEV(busn, devn, 0), PCI_VENDOR_ID); // ? the chain dev maybe offseted if ( (reg & 0xffff) == PCI_VENDOR_ID_AMD) { val = 0x25; } else if ( (reg & 0xffff) == PCI_VENDOR_ID_NVIDIA ) { @@ -681,18 +654,79 @@ static int optimize_link_read_pointers_chain(uint8_t ht_c_num) return reset_needed; } +static int set_ht_link_buffer_count(uint8_t node, uint8_t linkn, uint8_t linkt, unsigned val) +{ + uint32_t dword; + uint8_t link_type; + unsigned regpos; + device_t dev; + + /* This works on an Athlon64 because unimplemented links return 0 */ + regpos = 0x98 + (linkn * 0x20); + dev = PCI_DEV(0,0x18+node,0); + dword = pci_read_config32(dev, regpos); + link_type = dword & 0xff; + + if ( (link_type & 0x7) == linkt ) { /* Coherent Link only linkt = 3, ncoherent = 7*/ + regpos = 0x90 + (linkn * 0x20); + dword = pci_read_config32(dev, regpos ); + + if (dword != val) { + pci_write_config32(dev, regpos, val); + return 1; + } + } + + return 0; +} +static int set_ht_link_buffer_counts_chain(uint8_t ht_c_num, unsigned vendorid, unsigned val) +{ + int reset_needed; + uint8_t i; + + reset_needed = 0; + + for (i = 0; i < ht_c_num; i++) { + uint32_t reg; + uint8_t nodeid, linkn; + uint8_t busn; + unsigned devn = 1; + + #if HT_CHAIN_UNITID_BASE != 1 + #if SB_HT_CHAIN_UNITID_OFFSET_ONLY == 1 + if(i==0) // to check if it is sb ht chain + #endif + devn = HT_CHAIN_UNITID_BASE; + #endif + + reg = pci_read_config32(PCI_DEV(0,0x18,1), 0xe0 + i * 4); + if((reg & 3) != 3) continue; // not enabled + + nodeid = ((reg & 0xf0)>>4); // nodeid + linkn = ((reg & 0xf00)>>8); // link n + busn = (reg & 0xff0000)>>16; //busn + + reg = pci_read_config32( PCI_DEV(busn, devn, 0), PCI_VENDOR_ID); //1? + if ( (reg & 0xffff) == vendorid ) { + reset_needed |= set_ht_link_buffer_count(nodeid, linkn, 0x07,val); + } + } + + return reset_needed; +} + + #if RAMINIT_SYSINFO == 1 static void ht_setup_chains(uint8_t ht_c_num, struct sys_info *sysinfo) #else static int ht_setup_chains(uint8_t ht_c_num) #endif { - /* Assumption: The HT chain that is bus 0 has the HT I/O Hub on it. - * On most boards this just happens. If a cpu has multiple - * non coherent links the appropriate bus registers for the + /* Assumption the HT chain that is bus 0 has the HT I/O Hub on it. + * On most boards this just happens. If a cpu has multiple + * non Coherent links the appropriate bus registers for the * links needs to be programed to point at bus 0. */ - uint8_t upos; device_t udev; uint8_t i; @@ -717,14 +751,9 @@ static int ht_setup_chains(uint8_t ht_c_num) reg = pci_read_config32(PCI_DEV(0,0x18,1), 0xe0 + i * 4); - // We need to setup 0x94, 0xb4, and 0xd4 according to reg - - // nodeid; it will decide 0x18 or 0x19 - devpos = ((reg & 0xf0)>>4)+0x18; - - // link n; it will decide 0x94 or 0xb4, 0x0xd4; - regpos = ((reg & 0xf00)>>8) * 0x20 + 0x94; - + //We need setup 0x94, 0xb4, and 0xd4 according to the reg + devpos = ((reg & 0xf0)>>4)+0x18; // nodeid; it will decide 0x18 or 0x19 + regpos = ((reg & 0xf00)>>8) * 0x20 + 0x94; // link n; it will decide 0x94 or 0xb4, 0x0xd4; busn = (reg & 0xff0000)>>16; dword = pci_read_config32( PCI_DEV(0, devpos, 0), regpos) ; @@ -753,15 +782,12 @@ static int ht_setup_chains(uint8_t ht_c_num) #endif #if (USE_DCACHE_RAM == 1) && (K8_SCAN_PCI_BUS == 1) - /* You can not use use this in romcc, because recursive - * function calls in romcc will kill you - */ + /* You can use use this in romcc, because there is function call in romcc, recursive will kill you */ bus = busn; // we need 32 bit #if RAMINIT_SYSINFO == 1 scan_pci_bus(bus, sysinfo); #else - // take out reset_needed that is stored in upword - reset_needed |= (scan_pci_bus(bus)>>16); + reset_needed |= (scan_pci_bus(bus)>>16); // take out reset_needed that stored in upword #endif #endif } @@ -774,11 +800,9 @@ static int ht_setup_chains(uint8_t ht_c_num) } -static inline unsigned get_nodes(void) -{ - return ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1; -} - +#if defined (__GNUC__) +static inline unsigned get_nodes(void); +#endif #if RAMINIT_SYSINFO == 1 static void ht_setup_chains_x(struct sys_info *sysinfo) @@ -803,15 +827,14 @@ static int ht_setup_chains_x(void) /* update PCI_DEV(0, 0x18, 1) 0xe0 to 0x05000m03, and next_busn=0x3f+1 */ print_linkn_in("SBLink=", ((reg>>8) & 3) ); #if RAMINIT_SYSINFO == 1 - sysinfo->sblnk = (reg>>8) & 3; + sysinfo->sblk = (reg>>8) & 3; sysinfo->sbbusn = 0; sysinfo->nodes = nodes; #endif tempreg = 3 | ( 0<<4) | (((reg>>8) & 3)<<8) | (0<<16)| (0x3f<<24); pci_write_config32(PCI_DEV(0, 0x18, 1), 0xe0, tempreg); - /* 0 will be used ht chain with SB we need to keep SB in bus 0 in auto stage */ - next_busn=0x3f+1; + next_busn=0x3f+1; /* 0 will be used ht chain with SB we need to keep SB in bus0 in auto stage*/ #if K8_ALLOCATE_IO_RANGE == 1 /* io range allocation */ @@ -825,9 +848,12 @@ static int ht_setup_chains_x(void) /* clean others */ for(ht_c_num=1;ht_c_num<4; ht_c_num++) { pci_write_config32(PCI_DEV(0, 0x18, 1), 0xe0 + ht_c_num * 4, 0); + +#if K8_ALLOCATE_IO_RANGE == 1 /* io range allocation */ pci_write_config32(PCI_DEV(0, 0x18, 1), 0xc4 + ht_c_num * 8, 0); pci_write_config32(PCI_DEV(0, 0x18, 1), 0xc0 + ht_c_num * 8, 0); +#endif } for(nodeid=0; nodeid<nodes; nodeid++) { @@ -836,67 +862,38 @@ static int ht_setup_chains_x(void) dev = PCI_DEV(0, 0x18+nodeid,0); for(linkn = 0; linkn<3; linkn++) { unsigned regpos; - regpos = 0x98 + 0x20 * linkn; reg = pci_read_config32(dev, regpos); - - /* skip if link is non conherent or not connected*/ - if ((reg & 0x17) != 7) continue; - - print_linkn_in("NC node|link=", - ((nodeid & 0xf)<<4)|(linkn & 0xf)); - + if ((reg & 0x17) != 7) continue; /* it is not non conherent or not connected*/ + print_linkn_in("NC node|link=", ((nodeid & 0xf)<<4)|(linkn & 0xf)); tempreg = 3 | (nodeid <<4) | (linkn<<8); - - /* compare (temp & 0xffff) with - * (PCI(0, 0x18, 1) 0xe0 to 0xec & 0xfffff) - */ + /*compare (temp & 0xffff), with (PCI(0, 0x18, 1) 0xe0 to 0xec & 0xfffff) */ for(ht_c_num=0;ht_c_num<4; ht_c_num++) { - reg = pci_read_config32( PCI_DEV(0, 0x18, 1), - 0xe0 + ht_c_num * 4); - - if ( ((reg & 0xffff) == (tempreg & 0xffff)) - || ((reg & 0xffff) == 0x0000) ) { - /*we got it*/ + reg = pci_read_config32(PCI_DEV(0, 0x18, 1), 0xe0 + ht_c_num * 4); + if(((reg & 0xffff) == (tempreg & 0xffff)) || ((reg & 0xffff) == 0x0000)) { /*we got it*/ break; } } - - /* used up the maximum allowed 4 non conherent links */ - if(ht_c_num == 4) break; - - /* update to 0xe0...*/ - if((reg & 0xf) == 3) continue; /* SbLink so don't touch it */ - + if(ht_c_num == 4) break; /*used up only 4 non conherent allowed*/ + /*update to 0xe0...*/ + if((reg & 0xf) == 3) continue; /*SbLink so don't touch it */ print_linkn_in("\tbusn=", next_busn); - tempreg |= (next_busn<<16)|((next_busn+0x3f)<<24); - pci_write_config32(PCI_DEV(0, 0x18, 1), - 0xe0 + ht_c_num * 4, tempreg); - + pci_write_config32(PCI_DEV(0, 0x18, 1), 0xe0 + ht_c_num * 4, tempreg); next_busn+=0x3f+1; #if K8_ALLOCATE_IO_RANGE == 1 /* io range allocation */ - - // limit - tempreg = nodeid | (linkn<<4) | ((next_io_base+0x3)<<12); - pci_write_config32( PCI_DEV(0, 0x18, 1), - 0xC4 + ht_c_num * 8, tempreg); - - // base :ISA and VGA ? - tempreg = 3 /*| ( 3<<4)*/ | (next_io_base<<12); - pci_write_config32(PCI_DEV(0, 0x18, 1), - 0xC0 + ht_c_num * 8, tempreg); - + tempreg = nodeid | (linkn<<4) | ((next_io_base+0x3)<<12); //limit + pci_write_config32(PCI_DEV(0, 0x18, 1), 0xC4 + ht_c_num * 8, tempreg); + tempreg = 3 /*| ( 3<<4)*/ | (next_io_base<<12); //base :ISA and VGA ? + pci_write_config32(PCI_DEV(0, 0x18, 1), 0xC0 + ht_c_num * 8, tempreg); next_io_base += 0x3+0x1; #endif + } } - - /* update 0xe0, 0xe4, 0xe8, 0xec from PCI_DEV(0, 0x18,1) - * to PCI_DEV(0, 0x19,1) to PCI_DEV(0, 0x1f,1); - */ + /*update 0xe0, 0xe4, 0xe8, 0xec from PCI_DEV(0, 0x18,1) to PCI_DEV(0, 0x19,1) to PCI_DEV(0, 0x1f,1);*/ for(nodeid = 1; nodeid<nodes; nodeid++) { int i; @@ -938,6 +935,7 @@ static int ht_setup_chains_x(void) #if RAMINIT_SYSINFO == 1 sysinfo->ht_c_num = i; ht_setup_chains(i, sysinfo); + sysinfo->sbdn = get_sbdn(sysinfo->sbbusn); #else return ht_setup_chains(i); #endif @@ -947,7 +945,7 @@ static int ht_setup_chains_x(void) #if RAMINIT_SYSINFO == 1 static int optimize_link_incoherent_ht(struct sys_info *sysinfo) { - // We need to use the recorded link pair info to optimize the link + // We need to use recorded link pair info to optimize the link int i; int reset_needed = 0; @@ -955,12 +953,10 @@ static int optimize_link_incoherent_ht(struct sys_info *sysinfo) for(i=0; i< link_pair_num; i++) { struct link_pair_st *link_pair= &sysinfo->link_pair[i]; - reset_needed |= ht_optimize_link(link_pair->udev, - link_pair->upos, link_pair->uoffs, - link_pair->dev, link_pair->pos, link_pair->offs); + reset_needed |= ht_optimize_link(link_pair->udev, link_pair->upos, link_pair->uoffs, link_pair->dev, link_pair->pos, link_pair->offs); } - reset_needed |= optimize_link_read_pointers(sysinfo->ht_c_num); + reset_needed |= optimize_link_read_pointers_chain(sysinfo->ht_c_num); return reset_needed; diff --git a/src/northbridge/amd/amdk8/misc_control.c b/src/northbridge/amd/amdk8/misc_control.c index c7176c3b0d..b7072b2df3 100644 --- a/src/northbridge/amd/amdk8/misc_control.c +++ b/src/northbridge/amd/amdk8/misc_control.c @@ -121,6 +121,7 @@ static void misc_control_init(struct device *dev) cmd = pci_read_config32(dev, 0x44); cmd |= (1<<6) | (1<<25); pci_write_config32(dev, 0x44, cmd ); +#if K8_REV_F_SUPPORT == 0 if (is_cpu_pre_c0()) { /* Errata 58 @@ -177,6 +178,7 @@ static void misc_control_init(struct device *dev) needs_reset = 1; /* Needed? */ } } +#endif /* Optimize the Link read pointers */ f0_dev = dev_find_slot(0, dev->path.u.pci.devfn - 3); if (f0_dev) { diff --git a/src/northbridge/amd/amdk8/northbridge.c b/src/northbridge/amd/amdk8/northbridge.c index c36990dea7..a91c54a723 100644 --- a/src/northbridge/amd/amdk8/northbridge.c +++ b/src/northbridge/amd/amdk8/northbridge.c @@ -30,10 +30,14 @@ #include "amdk8.h" -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 #include <cpu/amd/model_fxx_rev.h> #endif +#include <cpu/amd/amdk8_sysconf.h> + +struct amdk8_sysconf_t sysconf; + #define FX_DEVS 8 static device_t __f0_dev[FX_DEVS]; static device_t __f1_dev[FX_DEVS]; @@ -97,8 +101,6 @@ static unsigned int amdk8_nodeid(device_t dev) return (dev->path.u.pci.devfn >> 3) - 0x18; } -unsigned hcdn_reg[4]; // it will be used by get_sblk_pci1234 - static unsigned int amdk8_scan_chain(device_t dev, unsigned nodeid, unsigned link, unsigned sblink, unsigned int max, unsigned offset_unitid) { #if 0 @@ -160,17 +162,17 @@ static unsigned int amdk8_scan_chain(device_t dev, unsigned nodeid, unsigned lin * We have no idea how many busses are behind this bridge yet, * so we set the subordinate bus number to 0xff for the moment. */ -#if K8_SB_HT_CHAIN_ON_BUS0 > 0 +#if SB_HT_CHAIN_ON_BUS0 > 0 // first chain will on bus 0 if((nodeid == 0) && (sblink==link)) { // actually max is 0 here min_bus = max; } - #if K8_SB_HT_CHAIN_ON_BUS0 > 1 + #if SB_HT_CHAIN_ON_BUS0 > 1 // second chain will be on 0x40, third 0x80, forth 0xc0 else { - min_bus = ((max>>6) + 1) * 0x40; + min_bus = ((max>>6) + 1) * 0x40; } - max = min_bus; + max = min_bus; #else //other ... else { @@ -248,7 +250,7 @@ static unsigned int amdk8_scan_chain(device_t dev, unsigned nodeid, unsigned lin temp |= (ht_unitid_base[i] & 0xff) << (i*8); } - hcdn_reg[index] = temp; + sysconf.hcdn_reg[index] = temp; } @@ -277,7 +279,7 @@ static unsigned int amdk8_scan_chains(device_t dev, unsigned int max) if(nodeid==0) { sblink = (pci_read_config32(dev, 0x64)>>8) & 3; -#if K8_SB_HT_CHAIN_ON_BUS0 > 0 +#if SB_HT_CHAIN_ON_BUS0 > 0 #if HT_CHAIN_UNITID_BASE != 1 offset_unitid = 1; #endif @@ -286,7 +288,7 @@ static unsigned int amdk8_scan_chains(device_t dev, unsigned int max) } for(link = 0; link < dev->links; link++) { -#if K8_SB_HT_CHAIN_ON_BUS0 > 0 +#if SB_HT_CHAIN_ON_BUS0 > 0 if( (nodeid == 0) && (sblink == link) ) continue; //already done #endif offset_unitid = 0; @@ -773,7 +775,7 @@ static uint32_t find_pci_tolm(struct bus *bus) #define BRIDGE_IO_MASK (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH) #endif -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 struct hw_mem_hole_info { unsigned hole_startk; @@ -785,7 +787,7 @@ static struct hw_mem_hole_info get_hw_mem_hole_info(void) struct hw_mem_hole_info mem_hole; int i; - mem_hole.hole_startk = K8_HW_MEM_HOLE_SIZEK; + mem_hole.hole_startk = HW_MEM_HOLE_SIZEK; mem_hole.node_id = -1; for (i = 0; i < 8; i++) { @@ -931,7 +933,7 @@ static void pci_domain_set_resources(device_t dev) unsigned long mmio_basek; uint32_t pci_tolm; int i, idx; -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 struct hw_mem_hole_info mem_hole; unsigned reset_memhole = 1; #endif @@ -1015,12 +1017,14 @@ static void pci_domain_set_resources(device_t dev) mmio_basek &= ~((64*1024) - 1); #endif -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 /* if the hw mem hole is already set in raminit stage, here we will compare mmio_basek and hole_basek * if mmio_basek is bigger that hole_basek and will use hole_basek as mmio_basek and we don't need to reset hole. * otherwise We reset the hole to the mmio_basek */ + #if K8_REV_F_SUPPORT == 0 if (!is_cpu_pre_e0()) { + #endif mem_hole = get_hw_mem_hole_info(); @@ -1032,13 +1036,13 @@ static void pci_domain_set_resources(device_t dev) //mmio_basek = 3*1024*1024; // for debug to meet boundary if(reset_memhole) { - if(mem_hole.node_id!=-1) { // We need to select K8_HW_MEM_HOLE_SIZEK for raminit, it can not make hole_startk to some basek too....! + if(mem_hole.node_id!=-1) { // We need to select HW_MEM_HOLE_SIZEK for raminit, it can not make hole_startk to some basek too....! // We need to reset our Mem Hole, because We want more big HOLE than we already set //Before that We need to disable mem hole at first, becase memhole could already be set on i+1 instead disable_hoist_memory(mem_hole.hole_startk, mem_hole.node_id); } - #if K8_HW_MEM_HOLE_SIZE_AUTO_INC == 1 + #if HW_MEM_HOLE_SIZE_AUTO_INC == 1 //We need to double check if the mmio_basek is valid for hole setting, if it is equal to basek, we need to decrease it some uint32_t basek_pri; for (i = 0; i < 8; i++) { @@ -1059,7 +1063,9 @@ static void pci_domain_set_resources(device_t dev) #endif } +#if K8_REV_F_SUPPORT == 0 } // is_cpu_pre_e0 +#endif #endif @@ -1098,9 +1104,11 @@ static void pci_domain_set_resources(device_t dev) idx += 0x10; sizek -= pre_sizek; } - #if K8_HW_MEM_HOLE_SIZEK != 0 + #if HW_MEM_HOLE_SIZEK != 0 if(reset_memhole) + #if K8_REV_F_SUPPORT == 0 if(!is_cpu_pre_e0() ) + #endif sizek += hoist_memory(mmio_basek,i); #endif @@ -1139,7 +1147,6 @@ static unsigned int pci_domain_scan_bus(device_t dev, unsigned int max) f0_dev = __f0_dev[i]; if (f0_dev && f0_dev->enabled) { uint32_t httc; - int j; httc = pci_read_config32(f0_dev, HT_TRANSACTION_CONTROL); httc &= ~HTTC_RSP_PASS_PW; if (!dev->link[0].disable_relaxed_ordering) { @@ -1169,24 +1176,20 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) struct bus *cpu_bus; device_t dev_mc; int bsp_apicid; - int apicid_offset; int i,j; - int nodes; unsigned nb_cfg_54; - int enable_apic_ext_id; unsigned siblings; int e0_later_single_core; int disable_siblings; - unsigned lift_bsp_apicid; nb_cfg_54 = 0; - enable_apic_ext_id = 0; - lift_bsp_apicid = 0; + sysconf.enabled_apic_ext_id = 0; + sysconf.lift_bsp_apicid = 0; siblings = 0; /* Find the bootstrap processors apicid */ bsp_apicid = lapicid(); - apicid_offset = bsp_apicid; + sysconf.apicid_offset = bsp_apicid; disable_siblings = !CONFIG_LOGICAL_CPUS; #if CONFIG_LOGICAL_CPUS == 1 @@ -1203,24 +1206,25 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) die("0:18.0 not found?"); } - nodes = ((pci_read_config32(dev_mc, 0x60)>>4) & 7) + 1; + sysconf.nodes = ((pci_read_config32(dev_mc, 0x60)>>4) & 7) + 1; + if (pci_read_config32(dev_mc, 0x68) & (HTTC_APIC_EXT_ID|HTTC_APIC_EXT_BRD_CST)) { - enable_apic_ext_id = 1; + sysconf.enabled_apic_ext_id = 1; if(bsp_apicid == 0) { /* bsp apic id is not changed */ - apicid_offset = APIC_ID_OFFSET; + sysconf.apicid_offset = APIC_ID_OFFSET; } else { - lift_bsp_apicid = 1; + sysconf.lift_bsp_apicid = 1; } } /* Find which cpus are present */ cpu_bus = &dev->link[0]; - for(i = 0; i < nodes; i++) { + for(i = 0; i < sysconf.nodes; i++) { device_t dev, cpu; struct device_path cpu_path; @@ -1262,7 +1266,11 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) // That is the typical case if(j == 0 ){ + #if K8_REV_F_SUPPORT == 0 e0_later_single_core = is_e0_later_in_bsp(i); // single core + #else + e0_later_single_core = is_cpu_f0_in_bsp(i); // We can read cpuid(1) from Func3 + #endif } else { e0_later_single_core = 0; } @@ -1321,13 +1329,13 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) if (cpu) { cpu->path.u.apic.node_id = i; cpu->path.u.apic.core_id = j; - if(enable_apic_ext_id) { - if(lift_bsp_apicid) { - cpu->path.u.apic.apic_id += apicid_offset; + if(sysconf.enabled_apic_ext_id) { + if(sysconf.lift_bsp_apicid) { + cpu->path.u.apic.apic_id += sysconf.apicid_offset; } else { if (cpu->path.u.apic.apic_id != 0) - cpu->path.u.apic.apic_id += apicid_offset; + cpu->path.u.apic.apic_id += sysconf.apicid_offset; } } printk_debug("CPU: %s %s\n", diff --git a/src/northbridge/amd/amdk8/raminit.c b/src/northbridge/amd/amdk8/raminit.c index 2141b31843..f7a12e77f8 100644 --- a/src/northbridge/amd/amdk8/raminit.c +++ b/src/northbridge/amd/amdk8/raminit.c @@ -18,7 +18,7 @@ #define K8_4RANK_DIMM_SUPPORT 0 #endif -#if USE_DCACHE_RAM == 1 +#if defined (__GNUC__) static void hard_reset(void); #endif @@ -44,8 +44,8 @@ static void setup_resource_map(const unsigned int *register_values, int max) print_debug("\r\n"); #endif #endif - dev = register_values[i] & ~0xff; - where = register_values[i] & 0xff; + dev = register_values[i] & ~0xfff; + where = register_values[i] & 0xfff; reg = pci_read_config32(dev, where); reg &= register_values[i+1]; reg |= register_values[i+2]; @@ -555,8 +555,8 @@ static void sdram_set_registers(const struct mem_controller *ctrl) print_spew("\r\n"); #endif #endif - dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0; - where = register_values[i] & 0xff; + dev = (register_values[i] & ~0xfff) - PCI_DEV(0, 0x18, 0) + ctrl->f0; + where = register_values[i] & 0xfff; reg = pci_read_config32(dev, where); reg &= register_values[i+1]; reg |= register_values[i+2]; @@ -886,7 +886,7 @@ static void set_top_mem(unsigned tom_k, unsigned hole_startk) /* Now set top of memory */ msr_t msr; - if(tom_k>(4*1024*1024)) { + if(tom_k > (4*1024*1024)) { msr.lo = (tom_k & 0x003fffff) << 10; msr.hi = (tom_k & 0xffc00000) >> 22; wrmsr(TOP_MEM2, msr); @@ -896,7 +896,7 @@ static void set_top_mem(unsigned tom_k, unsigned hole_startk) * so I can see my rom chip and other I/O devices. */ if (tom_k >= 0x003f0000) { -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 if(hole_startk != 0) { tom_k = hole_startk; } else @@ -2183,7 +2183,7 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl) return; } -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 static uint32_t hoist_memory(int controllers, const struct mem_controller *ctrl,unsigned hole_startk, int i) { int ii; @@ -2242,9 +2242,9 @@ static void set_hw_mem_hole(int controllers, const struct mem_controller *ctrl) uint32_t hole_startk; int i; - hole_startk = 4*1024*1024 - K8_HW_MEM_HOLE_SIZEK; + hole_startk = 4*1024*1024 - HW_MEM_HOLE_SIZEK; -#if K8_HW_MEM_HOLE_SIZE_AUTO_INC == 1 +#if HW_MEM_HOLE_SIZE_AUTO_INC == 1 //We need to double check if the hole_startk is valid, if it is equal to basek, we need to decrease it some uint32_t basek_pri; for(i=0; i<controllers; i++) { @@ -2388,7 +2388,7 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl) print_debug(" done\r\n"); } -#if K8_HW_MEM_HOLE_SIZEK != 0 +#if HW_MEM_HOLE_SIZEK != 0 // init hw mem hole here /* DramHoleValid bit only can be set after MemClrStatus is set by Hardware */ if(!is_cpu_pre_e0()) @@ -2450,6 +2450,10 @@ static int mem_inited(int controllers, const struct mem_controller *ctrl) } #if USE_DCACHE_RAM == 1 +static void set_sysinfo_in_ram(unsigned val) +{ +} + static void fill_mem_ctrl(int controllers, struct mem_controller *ctrl_a, const uint16_t *spd_addr) { int i; diff --git a/src/northbridge/amd/amdk8/raminit.h b/src/northbridge/amd/amdk8/raminit.h index 157dd13ee3..f46e58defe 100644 --- a/src/northbridge/amd/amdk8/raminit.h +++ b/src/northbridge/amd/amdk8/raminit.h @@ -1,6 +1,8 @@ #ifndef RAMINIT_H #define RAMINIT_H +#define NODE_NUMS 8 + #define DIMM_SOCKETS 4 struct mem_controller { unsigned node_id; @@ -9,5 +11,4 @@ struct mem_controller { uint16_t channel1[DIMM_SOCKETS]; }; - #endif /* RAMINIT_H */ diff --git a/src/northbridge/amd/amdk8/raminit_f.c b/src/northbridge/amd/amdk8/raminit_f.c new file mode 100644 index 0000000000..3c8341050e --- /dev/null +++ b/src/northbridge/amd/amdk8/raminit_f.c @@ -0,0 +1,3065 @@ +/* This should be done by Eric + 2004.11 yhlu add 4 rank DIMM support + 2004.12 yhlu add D0 support + 2005.02 yhlu add E0 memory hole support + 2005.10 yhlu make it support DDR2 only +*/ + +#include <cpu/x86/mem.h> +#include <cpu/x86/cache.h> +#include <cpu/x86/mtrr.h> +#include <cpu/x86/tsc.h> + +#include "raminit.h" +#include "amdk8_f.h" +#include "spd_ddr2.h" + +#ifndef QRANK_DIMM_SUPPORT +#define QRANK_DIMM_SUPPORT 0 +#endif + +static inline void print_raminit(const char *strval, uint32_t val) +{ +#if CONFIG_USE_INIT + printk_debug("%s:%08x\r\n", strval, val); +#else + print_debug(strval); print_debug_hex32(val); print_debug("\r\n"); +#endif +} + +#define RAM_TIMING_DEBUG 0 + +static inline void print_tx(const char *strval, uint32_t val) +{ +#if RAM_TIMING_DEBUG == 1 + print_raminit(strval, val); +#endif +} + + +static inline void print_t(const char *strval) +{ +#if RAM_TIMING_DEBUG == 1 + print_debug(strval); +#endif +} + + + +#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0 +# error "CONFIG_LB_MEM_TOPK must be a power of 2" +#endif + +#include "amdk8_f_pci.c" + + + // for PCI_ADDR(0, 0x18, 2, 0x98) index, and PCI_ADDR(0x, 0x18, 2, 0x9c) data + /* + index: + [29: 0] DctOffset (Dram Controller Offset) + [30:30] DctAccessWrite (Dram Controller Read/Write Select) + 0 = read access + 1 = write access + [31:31] DctAccessDone (Dram Controller Access Done) + 0 = Access in progress + 1 = No access is progress + + Data: + [31: 0] DctOffsetData (Dram Controller Offset Data) + + Read: + - Write the register num to DctOffset with DctAccessWrite = 0 + - poll the DctAccessDone until it = 1 + - Read the data from DctOffsetData + Write: + - Write the data to DctOffsetData + - Write register num to DctOffset with DctAccessWrite = 1 + - poll the DctAccessDone untio it = 1 + + */ + + +#if 1 +static void setup_resource_map(const unsigned int *register_values, int max) +{ + int i; + + for(i = 0; i < max; i += 3) { + device_t dev; + unsigned where; + unsigned long reg; + + dev = register_values[i] & ~0xff; + where = register_values[i] & 0xff; + reg = pci_read_config32(dev, where); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32(dev, where, reg); + } +} +#endif + +static int controller_present(const struct mem_controller *ctrl) +{ + return pci_read_config32(ctrl->f0, 0) == 0x11001022; +} + +static void sdram_set_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + static const unsigned int register_values[] = { + + /* Careful set limit registers before base registers which contain the enables */ + /* DRAM Limit i Registers + * F1:0x44 i = 0 + * F1:0x4C i = 1 + * F1:0x54 i = 2 + * F1:0x5C i = 3 + * F1:0x64 i = 4 + * F1:0x6C i = 5 + * F1:0x74 i = 6 + * F1:0x7C i = 7 + * [ 2: 0] Destination Node ID + * 000 = Node 0 + * 001 = Node 1 + * 010 = Node 2 + * 011 = Node 3 + * 100 = Node 4 + * 101 = Node 5 + * 110 = Node 6 + * 111 = Node 7 + * [ 7: 3] Reserved + * [10: 8] Interleave select + * specifies the values of A[14:12] to use with interleave enable. + * [15:11] Reserved + * [31:16] DRAM Limit Address i Bits 39-24 + * This field defines the upper address bits of a 40 bit address + * that define the end of the DRAM region. + */ + PCI_ADDR(0, 0x18, 1, 0x44), 0x0000f8f8, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x4C), 0x0000f8f8, 0x00000001, + PCI_ADDR(0, 0x18, 1, 0x54), 0x0000f8f8, 0x00000002, + PCI_ADDR(0, 0x18, 1, 0x5C), 0x0000f8f8, 0x00000003, + PCI_ADDR(0, 0x18, 1, 0x64), 0x0000f8f8, 0x00000004, + PCI_ADDR(0, 0x18, 1, 0x6C), 0x0000f8f8, 0x00000005, + PCI_ADDR(0, 0x18, 1, 0x74), 0x0000f8f8, 0x00000006, + PCI_ADDR(0, 0x18, 1, 0x7C), 0x0000f8f8, 0x00000007, + /* DRAM Base i Registers + * F1:0x40 i = 0 + * F1:0x48 i = 1 + * F1:0x50 i = 2 + * F1:0x58 i = 3 + * F1:0x60 i = 4 + * F1:0x68 i = 5 + * F1:0x70 i = 6 + * F1:0x78 i = 7 + * [ 0: 0] Read Enable + * 0 = Reads Disabled + * 1 = Reads Enabled + * [ 1: 1] Write Enable + * 0 = Writes Disabled + * 1 = Writes Enabled + * [ 7: 2] Reserved + * [10: 8] Interleave Enable + * 000 = No interleave + * 001 = Interleave on A[12] (2 nodes) + * 010 = reserved + * 011 = Interleave on A[12] and A[14] (4 nodes) + * 100 = reserved + * 101 = reserved + * 110 = reserved + * 111 = Interleve on A[12] and A[13] and A[14] (8 nodes) + * [15:11] Reserved + * [13:16] DRAM Base Address i Bits 39-24 + * This field defines the upper address bits of a 40-bit address + * that define the start of the DRAM region. + */ + PCI_ADDR(0, 0x18, 1, 0x40), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x48), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x50), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x58), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x60), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x68), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x70), 0x0000f8fc, 0x00000000, + PCI_ADDR(0, 0x18, 1, 0x78), 0x0000f8fc, 0x00000000, + + /* DRAM CS Base Address i Registers + * F2:0x40 i = 0 + * F2:0x44 i = 1 + * F2:0x48 i = 2 + * F2:0x4C i = 3 + * F2:0x50 i = 4 + * F2:0x54 i = 5 + * F2:0x58 i = 6 + * F2:0x5C i = 7 + * [ 0: 0] Chip-Select Bank Enable + * 0 = Bank Disabled + * 1 = Bank Enabled + * [ 1: 1] Spare Rank + * [ 2: 2] Memory Test Failed + * [ 4: 3] Reserved + * [13: 5] Base Address (21-13) + * An optimization used when all DIMM are the same size... + * [18:14] Reserved + * [28:19] Base Address (36-27) + * This field defines the top 11 addresses bit of a 40-bit + * address that define the memory address space. These + * bits decode 32-MByte blocks of memory. + * [31:29] Reserved + */ + PCI_ADDR(0, 0x18, 2, 0x40), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x44), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x48), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x4C), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x50), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x54), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x58), 0xe007c018, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x5C), 0xe007c018, 0x00000000, + /* DRAM CS Mask Address i Registers + * F2:0x60 i = 0,1 + * F2:0x64 i = 2,3 + * F2:0x68 i = 4,5 + * F2:0x6C i = 6,7 + * Select bits to exclude from comparison with the DRAM Base address register. + * [ 4: 0] Reserved + * [13: 5] Address Mask (21-13) + * Address to be excluded from the optimized case + * [18:14] Reserved + * [28:19] Address Mask (36-27) + * The bits with an address mask of 1 are excluded from address comparison + * [31:29] Reserved + * + */ + PCI_ADDR(0, 0x18, 2, 0x60), 0xe007c01f, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x64), 0xe007c01f, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x68), 0xe007c01f, 0x00000000, + PCI_ADDR(0, 0x18, 2, 0x6C), 0xe007c01f, 0x00000000, + + /* DRAM Control Register + * F2:0x78 + * [ 3: 0] RdPtrInit ( Read Pointer Initial Value) + * 0x03-0x00: reserved + * [ 6: 4] RdPadRcvFifoDly (Read Delay from Pad Receive FIFO) + * 000 = reserved + * 001 = reserved + * 010 = 1.5 Memory Clocks + * 011 = 2 Memory Clocks + * 100 = 2.5 Memory Clocks + * 101 = 3 Memory Clocks + * 110 = 3.5 Memory Clocks + * 111 = Reseved + * [15: 7] Reserved + * [16:16] AltVidC3MemClkTriEn (AltVID Memory Clock Tristate Enable) + * Enables the DDR memory clocks to be tristated when alternate VID mode is enabled. This bit has no effect if the DisNbClkRamp bit (F3, 0x88) is set + * [17:17] DllTempAdjTime (DLL Temperature Adjust Cycle Time) + * 0 = 5 ms + * 1 = 1 ms + * [18:18] DqsRcvEnTrain (DQS Receiver Enable Training Mode) + * 0 = Normal DQS Receiver enable operation + * 1 = DQS receiver enable training mode + * [31:19] reverved + */ + PCI_ADDR(0, 0x18, 2, 0x78), 0xfff80000, (6<<4)|(6<<0), + + /* DRAM Initialization Register + * F2:0x7C + * [15: 0] MrsAddress (Address for MRS/EMRS Commands) + * this field specifies the dsata driven on the DRAM address pins 15-0 for MRS and EMRS commands + * [18:16] MrsBank (Bank Address for MRS/EMRS Commands) + * this files specifies the data driven on the DRAM bank pins for the MRS and EMRS commands + * [23:19] reverved + * [24:24] SendPchgAll (Send Precharge All Command) + * Setting this bit causes the DRAM controller to send a precharge all command. This bit is cleared by the hardware after the command completes + * [25:25] SendAutoRefresh (Send Auto Refresh Command) + * Setting this bit causes the DRAM controller to send an auto refresh command. This bit is cleared by the hardware after the command completes + * [26:26] SendMrsCmd (Send MRS/EMRS Command) + * Setting this bit causes the DRAM controller to send the MRS or EMRS command defined by the MrsAddress and MrsBank fields. This bit is cleared by the hardware adter the commmand completes + * [27:27] DeassertMemRstX (De-assert Memory Reset) + * Setting this bit causes the DRAM controller to de-assert the memory reset pin. This bit cannot be used to assert the memory reset pin + * [28:28] AssertCke (Assert CKE) + * setting this bit causes the DRAM controller to assert the CKE pins. This bit cannot be used to de-assert the CKE pins + * [30:29] reverved + * [31:31] EnDramInit (Enable DRAM Initialization) + * Setting this bit puts the DRAM controller in a BIOS controlled DRAM initialization mode. BIOS must clear this bit aster DRAM initialization is complete. + */ +// PCI_ADDR(0, 0x18, 2, 0x7C), 0x60f80000, 0, + + + /* DRAM Bank Address Mapping Register + * F2:0x80 + * Specify the memory module size + * [ 3: 0] CS1/0 + * [ 7: 4] CS3/2 + * [11: 8] CS5/4 + * [15:12] CS7/6 + * [31:16] + row col bank + 0: 13 9 2 :128M + 1: 13 10 2 :256M + 2: 14 10 2 :512M + 3: 13 11 2 :512M + 4: 13 10 3 :512M + 5: 14 10 3 :1G + 6: 14 11 2 :1G + 7: 15 10 3 :2G + 8: 14 11 3 :2G + 9: 15 11 3 :4G + 10: 16 10 3 :4G + 11: 16 11 3 :8G + */ + PCI_ADDR(0, 0x18, 2, 0x80), 0xffff0000, 0x00000000, + /* DRAM Timing Low Register + * F2:0x88 + * [ 2: 0] Tcl (Cas# Latency, Cas# to read-data-valid) + * 000 = reserved + * 001 = reserved + * 010 = CL 3 + * 011 = CL 4 + * 100 = CL 5 + * 101 = CL 6 + * 110 = reserved + * 111 = reserved + * [ 3: 3] Reserved + * [ 5: 4] Trcd (Ras#-active to Cas# read/write delay) + * 00 = 3 clocks + * 01 = 4 clocks + * 10 = 5 clocks + * 11 = 6 clocks + * [ 7: 6] Reserved + * [ 9: 8] Trp (Row Precharge Time, Precharge-to-Active or Auto-Refresh) + * 00 = 3 clocks + * 01 = 4 clocks + * 10 = 5 clocks + * 11 = 6 clocks + * [10:10] Reserved + * [11:11] Trtp (Read to Precharge Time, read Cas# to precharge time) + * 0 = 2 clocks for Burst Length of 32 Bytes + * 4 clocks for Burst Length of 64 Bytes + * 1 = 3 clocks for Burst Length of 32 Bytes + * 5 clocks for Burst Length of 64 Bytes + * [15:12] Tras (Minimum Ras# Active Time) + * 0000 = reserved + * 0001 = reserved + * 0010 = 5 bus clocks + * ... + * 1111 = 18 bus clocks + * [19:16] Trc (Row Cycle Time, Ras#-active to Ras#-active or auto refresh of the same bank) + * 0000 = 11 bus clocks + * 0010 = 12 bus clocks + * ... + * 1110 = 25 bus clocks + * 1111 = 26 bus clocks + * [21:20] Twr (Write Recovery Time, From the last data to precharge, writes can go back-to-back) + * 00 = 3 bus clocks + * 01 = 4 bus clocks + * 10 = 5 bus clocks + * 11 = 6 bus clocks + * [23:22] Trrd (Active-to-active (Ras#-to-Ras#) Delay of different banks) + * 00 = 2 bus clocks + * 01 = 3 bus clocks + * 10 = 4 bus clocks + * 11 = 5 bus clocks + * [31:24] MemClkDis ( Disable the MEMCLK outputs for DRAM channel A, BIOS should set it to reduce the power consumption) + * Bit F(1207) M2 Package S1g1 Package + * 0 N/A MA1_CLK1 N/A + * 1 N/A MA0_CLK1 MA0_CLK1 + * 2 MA3_CLK N/A N/A + * 3 MA2_CLK N/A N/A + * 4 MA1_CLK MA1_CLK0 N/A + * 5 MA0_CLK MA0_CLK0 MA0_CLK0 + * 6 N/A MA1_CLK2 N/A + * 7 N/A MA0_CLK2 MA0_CLK2 + */ + PCI_ADDR(0, 0x18, 2, 0x88), 0x000004c8, 0xff000002 /* 0x03623125 */ , + /* DRAM Timing High Register + * F2:0x8C + * [ 3: 0] Reserved + * [ 6: 4] TrwtTO (Read-to-Write Turnaround for Data, DQS Contention) + * 000 = 2 bus clocks + * 001 = 3 bus clocks + * 010 = 4 bus clocks + * 011 = 5 bus clocks + * 100 = 6 bus clocks + * 101 = 7 bus clocks + * 110 = 8 bus clocks + * 111 = 9 bus clocks + * [ 7: 7] Reserved + * [ 9: 8] Twtr (Internal DRAM Write-to-Read Command Delay, minium write-to-read delay when both access the same chip select) + * 00 = Reserved + * 01 = 1 bus clocks + * 10 = 2 bus clocks + * 11 = 3 bus clocks + * [11:10] Twrrd (Write to Read DIMM Termination Turnaround, minimum write-to-read delay when accessing two different DIMMs) + * 00 = 0 bus clocks + * 01 = 1 bus clocks + * 10 = 2 bus clocks + * 11 = 3 bus clocks + * [13:12] Twrwr (Write to Write Timing) + * 00 = 1 bus clocks ( 0 idle cycle on the bus) + * 01 = 2 bus clocks ( 1 idle cycle on the bus) + * 10 = 3 bus clocks ( 2 idle cycles on the bus) + * 11 = Reserved + * [15:14] Trdrd ( Read to Read Timing) + * 00 = 2 bus clocks ( 1 idle cycle on the bus) + * 01 = 3 bus clocks ( 2 idle cycles on the bus) + * 10 = 4 bus clocks ( 3 idle cycles on the bus) + * 11 = 5 bus clocks ( 4 idel cycles on the bus) + * [17:16] Tref (Refresh Rate) + * 00 = Undefined behavior + * 01 = Reserved + * 10 = Refresh interval of 7.8 microseconds + * 11 = Refresh interval of 3.9 microseconds + * [19:18] Reserved + * [22:20] Trfc0 ( Auto-Refresh Row Cycle Time for the Logical DIMM0, based on DRAM density and speed) + * 000 = 75 ns (all speeds, 256Mbit) + * 001 = 105 ns (all speeds, 512Mbit) + * 010 = 127.5 ns (all speeds, 1Gbit) + * 011 = 195 ns (all speeds, 2Gbit) + * 100 = 327.5 ns (all speeds, 4Gbit) + * 101 = reserved + * 110 = reserved + * 111 = reserved + * [25:23] Trfc1 ( Auto-Refresh Row Cycle Time for the Logical DIMM1, based on DRAM density and speed) + * [28:26] Trfc2 ( Auto-Refresh Row Cycle Time for the Logical DIMM2, based on DRAM density and speed) + * [31:29] Trfc3 ( Auto-Refresh Row Cycle Time for the Logical DIMM3, based on DRAM density and speed) + */ + PCI_ADDR(0, 0x18, 2, 0x8c), 0x000c008f, (2 << 16)|(1 << 8), + /* DRAM Config Low Register + * F2:0x90 + * [ 0: 0] InitDram (Initialize DRAM) + * 1 = write 1 cause DRAM controller to execute the DRAM initialization, when done it read to 0 + * [ 1: 1] ExitSelfRef ( Exit Self Refresh Command ) + * 1 = write 1 causes the DRAM controller to bring the DRAMs out fo self refresh mode + * [ 3: 2] Reserved + * [ 5: 4] DramTerm (DRAM Termination) + * 00 = On die termination disabled + * 01 = 75 ohms + * 10 = 150 ohms + * 11 = 50 ohms + * [ 6: 6] Reserved + * [ 7: 7] DramDrvWeak ( DRAM Drivers Weak Mode) + * 0 = Normal drive strength mode. + * 1 = Weak drive strength mode + * [ 8: 8] ParEn (Parity Enable) + * 1 = Enable address parity computation output, PAR, and enables the parity error input, ERR + * [ 9: 9] SelfRefRateEn (Faster Self Refresh Rate Enable) + * 1 = Enable high temperature ( two times normal ) self refresh rate + * [10:10] BurstLength32 ( DRAM Burst Length Set for 32 Bytes) + * 0 = 64-byte mode + * 1 = 32-byte mode + * [11:11] Width128 ( Width of DRAM interface) + * 0 = the controller DRAM interface is 64-bits wide + * 1 = the controller DRAM interface is 128-bits wide + * [12:12] X4Dimm (DIMM 0 is x4) + * [13:13] X4Dimm (DIMM 1 is x4) + * [14:14] X4Dimm (DIMM 2 is x4) + * [15:15] X4Dimm (DIMM 3 is x4) + * 0 = DIMM is not x4 + * 1 = x4 DIMM present + * [16:16] UnBuffDimm ( Unbuffered DIMMs) + * 0 = Buffered DIMMs + * 1 = Unbuffered DIMMs + * [18:17] Reserved + * [19:19] DimmEccEn ( DIMM ECC Enable ) + 1 = ECC checking is being enabled for all DIMMs on the DRAM controller ( Through F3 0x44[EccEn]) + * [31:20] Reserved + */ + PCI_ADDR(0, 0x18, 2, 0x90), 0xfff6004c, 0x00000010, + /* DRAM Config High Register + * F2:0x94 + * [ 0: 2] MemClkFreq ( Memory Clock Frequency) + * 000 = 200MHz + * 001 = 266MHz + * 010 = 333MHz + * 011 = reserved + * 1xx = reserved + * [ 3: 3] MemClkFreqVal (Memory Clock Freqency Valid) + * 1 = BIOS need to set the bit when setting up MemClkFreq to the proper value + * [ 7: 4] MaxAsyncLat ( Maximum Asynchronous Latency) + * 0000 = 0 ns + * ... + * 1111 = 15 ns + * [11: 8] Reserved + * [12:12] RDqsEn ( Read DQS Enable) This bit is only be set if x8 registered DIMMs are present in the system + * 0 = DM pins function as data mask pins + * 1 = DM pins function as read DQS pins + * [13:13] Reserved + * [14:14] DisDramInterface ( Disable the DRAM interface ) When this bit is set, the DRAM controller is disabled, and interface in low power state + * 0 = Enabled (default) + * 1 = Disabled + * [15:15] PowerDownEn ( Power Down Mode Enable ) + * 0 = Disabled (default) + * 1 = Enabled + * [16:16] PowerDown ( Power Down Mode ) + * 0 = Channel CKE Control + * 1 = Chip Select CKE Control + * [17:17] FourRankSODimm (Four Rank SO-DIMM) + * 1 = this bit is set by BIOS to indicate that a four rank SO-DIMM is present + * [18:18] FourRankRDimm (Four Rank Registered DIMM) + * 1 = this bit is set by BIOS to indicate that a four rank registered DIMM is present + * [19:19] Reserved + * [20:20] SlowAccessMode (Slow Access Mode (2T Mode)) + * 0 = DRAM address and control signals are driven for one MEMCLK cycle + * 1 = One additional MEMCLK of setup time is provided on all DRAM address and control signals except CS, CKE, and ODT; i.e., these signals are drivern for two MEMCLK cycles rather than one + * [21:21] Reserved + * [22:22] BankSwizzleMode ( Bank Swizzle Mode), + * 0 = Disabled (default) + * 1 = Enabled + * [23:23] Reserved + * [27:24] DcqBypassMax ( DRAM Controller Queue Bypass Maximum) + * 0000 = No bypass; the oldest request is never bypassed + * 0001 = The oldest request may be bypassed no more than 1 time + * ... + * 1111 = The oldest request may be bypassed no more than 15 times + * [31:28] FourActWindow ( Four Bank Activate Window) , not more than 4 banks in a 8 bank device are activated + * 0000 = No tFAW window restriction + * 0001 = 8 MEMCLK cycles + * 0010 = 9 MEMCLK cycles + * ... + * 1101 = 20 MEMCLK cycles + * 111x = reserved + */ + PCI_ADDR(0, 0x18, 2, 0x94), 0x00a82f00,0x00008000, + /* DRAM Delay Line Register + * F2:0xa0 + * [ 0: 0] MemClrStatus (Memory Clear Status) : ---------Readonly + * when set, this bit indicates that the memory clear function is complete. Only clear by reset. BIOS should not write or read the DRAM until this bit is set by hardware + * [ 1: 1] DisableJitter ( Disable Jitter) + * When set the DDR compensation circuit will not change the values unless the change is more than one step from the current value + * [ 3: 2] RdWrQByp ( Read/Write Queue Bypass Count) + * 00 = 2 + * 01 = 4 + * 10 = 8 + * 11 = 16 + * [ 4: 4] Mode64BitMux (Mismatched DIMM Support Enable) + * 1 When bit enables support for mismatched DIMMs when using 128-bit DRAM interface, the Width128 no effect, only for M2 and s1g1 + * [ 5: 5] DCC_EN ( Dynamica Idle Cycle Counter Enable) + * When set to 1, indicates that each entry in the page tables dynamically adjusts the idle cycle limit based on page Conflict/Page Miss (PC/PM) traffic + * [ 8: 6] ILD_lmt ( Idle Cycle Limit) + * 000 = 0 cycles + * 001 = 4 cycles + * 010 = 8 cycles + * 011 = 16 cycles + * 100 = 32 cycles + * 101 = 64 cycles + * 110 = 128 cycles + * 111 = 256 cycles + * [ 9: 9] DramEnabled ( DRAM Enabled) + * When Set, this bit indicates that the DRAM is enabled, this bit is set by hardware after DRAM initialization or on an exit from self refresh. The DRAM controller is intialized after the + * hardware-controlled initialization process ( initiated by the F2 0x90[DramInit]) completes or when the BIOS-controlled initialization process completes (F2 0x7c(EnDramInit] is + * written from 1 to 0) + * [23:10] Reserved + * [31:24] MemClkDis ( Disable the MEMCLK outputs for DRAM channel B, BIOS should set it to reduce the power consumption) + * Bit F(1207) M2 Package S1g1 Package + * 0 N/A MA1_CLK1 N/A + * 1 N/A MA0_CLK1 MA0_CLK1 + * 2 MA3_CLK N/A N/A + * 3 MA2_CLK N/A N/A + * 4 MA1_CLK MA1_CLK0 N/A + * 5 MA0_CLK MA0_CLK0 MA0_CLK0 + * 6 N/A MA1_CLK2 N/A + * 7 N/A MA0_CLK2 MA0_CLK2 + */ + PCI_ADDR(0, 0x18, 2, 0xa0), 0x00fffc00, 0xff000000, + + /* DRAM Scrub Control Register + * F3:0x58 + * [ 4: 0] DRAM Scrube Rate + * [ 7: 5] reserved + * [12: 8] L2 Scrub Rate + * [15:13] reserved + * [20:16] Dcache Scrub + * [31:21] reserved + * Scrub Rates + * 00000 = Do not scrub + * 00001 = 40.00 ns + * 00010 = 80.00 ns + * 00011 = 160.00 ns + * 00100 = 320.00 ns + * 00101 = 640.00 ns + * 00110 = 1.28 us + * 00111 = 2.56 us + * 01000 = 5.12 us + * 01001 = 10.20 us + * 01011 = 41.00 us + * 01100 = 81.90 us + * 01101 = 163.80 us + * 01110 = 327.70 us + * 01111 = 655.40 us + * 10000 = 1.31 ms + * 10001 = 2.62 ms + * 10010 = 5.24 ms + * 10011 = 10.49 ms + * 10100 = 20.97 ms + * 10101 = 42.00 ms + * 10110 = 84.00 ms + * All Others = Reserved + */ + PCI_ADDR(0, 0x18, 3, 0x58), 0xffe0e0e0, 0x00000000, + /* DRAM Scrub Address Low Register + * F3:0x5C + * [ 0: 0] DRAM Scrubber Redirect Enable + * 0 = Do nothing + * 1 = Scrubber Corrects errors found in normal operation + * [ 5: 1] Reserved + * [31: 6] DRAM Scrub Address 31-6 + */ + PCI_ADDR(0, 0x18, 3, 0x5C), 0x0000003e, 0x00000000, + /* DRAM Scrub Address High Register + * F3:0x60 + * [ 7: 0] DRAM Scrubb Address 39-32 + * [31: 8] Reserved + */ + PCI_ADDR(0, 0x18, 3, 0x60), 0xffffff00, 0x00000000, + }; + // for PCI_ADDR(0, 0x18, 2, 0x98) index, and PCI_ADDR(0x, 0x18, 2, 0x9c) data + /* + index: + [29: 0] DctOffset (Dram Controller Offset) + [30:30] DctAccessWrite (Dram Controller Read/Write Select) + 0 = read access + 1 = write access + [31:31] DctAccessDone (Dram Controller Access Done) + 0 = Access in progress + 1 = No access is progress + + Data: + [31: 0] DctOffsetData (Dram Controller Offset Data) + + Read: + - Write the register num to DctOffset with DctAccessWrite = 0 + - poll the DctAccessDone until it = 1 + - Read the data from DctOffsetData + Write: + - Write the data to DctOffsetData + - Write register num to DctOffset with DctAccessWrite = 1 + - poll the DctAccessDone untio it = 1 + + */ +#if 0 + static const unsigned int index_register_values[] = { + /* Output Driver Compensation Control Register + * Index: 0x00 + * [ 1: 0] CkeDrvStren (CKE Drive Strength) + * 00 = 1.0x + * 01 = 1.25x + * 10 = 1.5x (Default) + * 11 = 2.0x + * [ 3: 2] reserved + * [ 5: 4] CsOdtDrvStren (CS/ODT Drive Strength) + * 00 = 1.0x + * 01 = 1.25x + * 10 = 1.5x (Default) + * 11 = 2.0x + * [ 7: 6] reserved + * [ 9: 8] AddrCmdDrvStren (Address/Command Drive Strength) + * 00 = 1.0x + * 01 = 1.25x + * 10 = 1.5x (Default) + * 11 = 2.0x + * [11:10] reserved + * [13:12] ClkDrvStren (MEMCLK Drive Strength) + * 00 = 0.75x + * 01 = 1.0x Default) + * 10 = 1.25x + * 11 = 1.5x + * [15:14] reserved + * [17:16] DataDrvStren (Data Drive Strength) + * 00 = 0.75x + * 01 = 1.0x Default) + * 10 = 1.25x + * 11 = 1.5x + * [19:18] reserved + * [21:20] DqsDrvStren (DQS Drive Strength) + * 00 = 0.75x + * 01 = 1.0x Default) + * 10 = 1.25x + * 11 = 1.5x + * [27:22] reserved + * [29:28] ProcOdt ( Processor On-die Termination) + * 00 = 300 ohms +/- 20% + * 01 = 150 ohms +/- 20% + * 10 = 75 ohms +/- 20% + * 11 = reserved + * [31:30] reserved + */ + 0x00, 0xcfcccccc, 0x00000000, + 0x20, 0xcfcccccc, 0x00000000, + /* Write Data Timing Low Control Register + * Index 0x01 + * [ 5: 0] WrDatTimeByte0 (Write Data Byte 0 Timing Control) + * 000000 = no delay + * 000001 = 1/96 MEMCLK delay + * 000010 = 2/96 MEMCLK delay + * ... + * 101111 = 47/96 MEMCLK delay + * 11xxxx = reserved + * [ 7: 6] reserved + * [13: 8] WrDatTimeByte1 (Write Data Byte 1 Timing Control) + * [15:14] reserved + * [21:16] WrDatTimeByte2 (Write Data Byte 2 Timing Control) + * [23:22] reserved + * [29:24] WrDatTimeByte3 (Write Data Byte 3 Timing Control) + * [31:30] reserved + */ + 0x01, 0xc0c0c0c0, 0x00000000, + 0x21, 0xc0c0c0c0, 0x00000000, + /* Write Data Timing High Control Register + * Index 0x02 + * [ 5: 0] WrDatTimeByte4 (Write Data Byte 4 Timing Control) + * [ 7: 6] reserved + * [13: 8] WrDatTimeByte5 (Write Data Byte 5 Timing Control) + * [15:14] reserved + * [21:16] WrDatTimeByte6 (Write Data Byte 6 Timing Control) + * [23:22] reserved + * [29:24] WrDatTimeByte7 (Write Data Byte 7 Timing Control) + * [31:30] reserved + */ + 0x02, 0xc0c0c0c0, 0x00000000, + 0x22, 0xc0c0c0c0, 0x00000000, + + /* Write Data ECC Timing Control Register + * Index 0x03 + * [ 5: 0] WrChkTime (Write Data ECC Timing Control) + * 000000 = no delay + * 000001 = 1/96 MEMCLK delay + * 000010 = 2/96 MEMCLK delay + * ... + * 101111 = 47/96 MEMCLK delay + * 11xxxx = reserved + * [31: 6] reserved + */ + 0x03, 0x000000c0, 0x00000000, + 0x23, 0x000000c0, 0x00000000, + + /* Address Timing Control Register + * Index 0x04 + * [ 4: 0] CkeFineDelay (CKE Fine Delay) + * 00000 = no delay + * 00001 = 1/64 MEMCLK delay + * 00010 = 2/64 MEMCLK delay + * ... + * 11111 = 31/64 MEMCLK delay + * [ 5: 5] CkeSetup (CKE Setup Time) + * 0 = 1/2 MEMCLK + * 1 = 1 MEMCLK + * [ 7: 6] reserved + * [12: 8] CsOdtFineDelay (CS/ODT Fine Delay) + * 00000 = no delay + * 00001 = 1/64 MEMCLK delay + * 00010 = 2/64 MEMCLK delay + * ... + * 11111 = 31/64 MEMCLK delay + * [13:13] CsOdtSetup (CS/ODT Setup Time) + * 0 = 1/2 MEMCLK + * 1 = 1 MEMCLK + * [15:14] reserved + * [20:16] AddrCmdFineDelay (Address/Command Fine Delay) + * 00000 = no delay + * 00001 = 1/64 MEMCLK delay + * 00010 = 2/64 MEMCLK delay + * ... + * 11111 = 31/64 MEMCLK delay + * [21:21] AddrCmdSetup (Address/Command Setup Time) + * 0 = 1/2 MEMCLK + * 1 = 1 MEMCLK + * [31:22] reserved + */ + 0x04, 0xffc0c0c0, 0x00000000, + 0x24, 0xffc0c0c0, 0x00000000, + + /* Read DQS Timing Low Control Register + * Index 0x05 + * [ 5: 0] RdDqsTimeByte0 (Read DQS Byte 0 Timing Control) + * 000000 = no delay + * 000001 = 1/96 MEMCLK delay + * 000010 = 2/96 MEMCLK delay + * ... + * 101111 = 47/96 MEMCLK delay + * 11xxxx = reserved + * [ 7: 6] reserved + * [13: 8] RdDqsTimeByte1 (Read DQS Byte 1 Timing Control) + * [15:14] reserved + * [21:16] RdDqsTimeByte2 (Read DQS Byte 2 Timing Control) + * [23:22] reserved + * [29:24] RdDqsTimeByte3 (Read DQS Byte 3 Timing Control) + * [31:30] reserved + */ + 0x05, 0xc0c0c0c0, 0x00000000, + 0x25, 0xc0c0c0c0, 0x00000000, + + /* Read DQS Timing High Control Register + * Index 0x06 + * [ 5: 0] RdDqsTimeByte4 (Read DQS Byte 4 Timing Control) + * [ 7: 6] reserved + * [13: 8] RdDqsTimeByte5 (Read DQS Byte 5 Timing Control) + * [15:14] reserved + * [21:16] RdDqsTimeByte6 (Read DQS Byte 6 Timing Control) + * [23:22] reserved + * [29:24] RdDqsTimeByte7 (Read DQS Byte 7 Timing Control) + * [31:30] reserved + */ + 0x06, 0xc0c0c0c0, 0x00000000, + 0x26, 0xc0c0c0c0, 0x00000000, + + /* Read DQS ECC Timing Control Register + * Index 0x07 + * [ 5: 0] RdDqsTimeCheck (Read DQS ECC Timing Control) + * 000000 = no delay + * 000001 = 1/96 MEMCLK delay + * 000010 = 2/96 MEMCLK delay + * ... + * 101111 = 47/96 MEMCLK delay + * 11xxxx = reserved + * [31: 6] reserved + */ + 0x07, 0x000000c0, 0x00000000, + 0x27, 0x000000c0, 0x00000000, + + /* DQS Receiver Enable Timing Control Register + * Index 0x10, 0x13, 0x16, 0x19, + * [ 7: 0] Dqs RcvEnDelay (DQS Receiver Enable Delay) + * 0x00 = 0 ps + * 0x01 = 50 ps + * 0x02 = 100 ps + * ... + * 0xae = 8.7 ns + * 0xaf-0xff = reserved + * [31: 6] reserved + */ + 0x10, 0x000000ff, 0x00000000, + 0x13, 0x000000ff, 0x00000000, + 0x16, 0x000000ff, 0x00000000, + 0x19, 0x000000ff, 0x00000000, + 0x30, 0x000000ff, 0x00000000, + 0x33, 0x000000ff, 0x00000000, + 0x36, 0x000000ff, 0x00000000, + 0x39, 0x000000ff, 0x00000000, + }; +#endif + + int i; + int max; + +#if 1 + if (!controller_present(ctrl)) { +// print_debug("No memory controller present\r\n"); + sysinfo->ctrl_present[ctrl->node_id] = 0; + return; + } +#endif + sysinfo->ctrl_present[ctrl->node_id] = 1; + + print_spew("setting up CPU"); + print_spew_hex8(ctrl->node_id); + print_spew(" northbridge registers\r\n"); + max = sizeof(register_values)/sizeof(register_values[0]); + for(i = 0; i < max; i += 3) { + device_t dev; + unsigned where; + unsigned long reg; + dev = (register_values[i] & ~0xff) - PCI_DEV(0, 0x18, 0) + ctrl->f0; + where = register_values[i] & 0xff; + reg = pci_read_config32(dev, where); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32(dev, where, reg); + } + +#if 0 + // for index regs + max = sizeof(index_register_values)/sizeof(index_register_values[0]); + for(i = 0; i < max; i += 3) { + unsigned long reg; + unsigned index; + index = register_values[i]; + reg = pci_read_config32_index_wait(ctrl->f2, DRAM_CTRL_ADDI_DATA_OFFSET, index); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32_index_wait(ctrl->f2, DRAM_CTRL_ADDI_DATA_OFFSET, index, reg); + } +#endif + + print_spew("done.\r\n"); +} + +static int is_dual_channel(const struct mem_controller *ctrl) +{ + uint32_t dcl; + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + return dcl & DCL_Width128; +} + +static int is_registered(const struct mem_controller *ctrl) +{ + /* Test to see if we are dealing with registered SDRAM. + * If we are not registered we are unbuffered. + * This function must be called after spd_handle_unbuffered_dimms. + */ + uint32_t dcl; + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + return !(dcl & DCL_UnBuffDimm); +} + +static void spd_get_dimm_size(unsigned device, struct dimm_size *sz) +{ + /* Calculate the log base 2 size of a DIMM in bits */ + int value; + sz->per_rank = 0; + sz->rows = 0; + sz->col = 0; + sz->rank = 0; + + value = spd_read_byte(device, SPD_ROW_NUM); /* rows */ + if (value < 0) goto hw_err; + if ((value & 0xff) == 0) goto val_err; // max is 16 ? + sz->per_rank += value & 0xff; + sz->rows = value & 0xff; + + value = spd_read_byte(device, SPD_COL_NUM); /* columns */ + if (value < 0) goto hw_err; + if ((value & 0xff) == 0) goto val_err; //max is 11 + sz->per_rank += value & 0xff; + sz->col = value & 0xff; + + value = spd_read_byte(device, SPD_BANK_NUM); /* banks */ + if (value < 0) goto hw_err; + if ((value & 0xff) == 0) goto val_err; + sz->bank = log2(value & 0xff); // convert 4 to 2, and 8 to 3 + sz->per_rank += sz->bank; + + /* Get the module data width and convert it to a power of two */ + value = spd_read_byte(device, SPD_DATA_WIDTH); + if (value < 0) goto hw_err; + value &= 0xff; + if ((value != 72) && (value != 64)) goto val_err; + sz->per_rank += log2(value) - 3; //64 bit So another 3 lines + + /* How many ranks? */ + value = spd_read_byte(device, SPD_MOD_ATTRIB_RANK); /* number of physical banks */ + if (value < 0) goto hw_err; +// value >>= SPD_MOD_ATTRIB_RANK_NUM_SHIFT; + value &= SPD_MOD_ATTRIB_RANK_NUM_MASK; + value += SPD_MOD_ATTRIB_RANK_NUM_BASE; // 0-->1, 1-->2, 3-->4 + /* + rank == 1 only one rank or say one side + rank == 2 two side , and two ranks + rank == 4 two side , and four ranks total + Some one side two ranks, because of stacked + */ + if ((value != 1) && (value != 2) && (value != 4 )) { + goto val_err; + } + sz->rank = value; + + /* verify if per_rank is equal byte 31 + it has the DIMM size as a multiple of 128MB. + */ + value = spd_read_byte(device, SPD_RANK_SIZE); + if (value < 0) goto hw_err; + value &= 0xff; + value = log2(value); + if(value <=4 ) value += 8; // add back to 1G to high + value += (27-5); // make 128MB to the real lines + if( value != (sz->per_rank)) { + print_err("Bad RANK Size --\r\n"); + goto val_err; + } + + goto out; + + val_err: + die("Bad SPD value\r\n"); + /* If an hw_error occurs report that I have no memory */ +hw_err: + sz->per_rank = 0; + sz->rows = 0; + sz->col = 0; + sz->bank = 0; + sz->rank = 0; + out: + return; +} + +static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size *sz, unsigned index, int is_Width128) +{ + uint32_t base0, base1; + + + /* For each base register. + * Place the dimm size in 32 MB quantities in the bits 31 - 21. + * The initialize dimm size is in bits. + * Set the base enable bit0. + */ + + base0 = base1 = 0; + + /* Make certain side1 of the dimm is at least 128MB */ + if (sz->per_rank >= 27) { + base0 = (1 << ((sz->per_rank - 27 ) + 19)) | 1; + } + + /* Make certain side2 of the dimm is at least 128MB */ + if (sz->rank > 1) { // 2 ranks or 4 ranks + base1 = (1 << ((sz->per_rank - 27 ) + 19)) | 1; + } + + /* Double the size if we are using dual channel memory */ + if (is_Width128) { + base0 = (base0 << 1) | (base0 & 1); + base1 = (base1 << 1) | (base1 & 1); + } + + /* Clear the reserved bits */ + base0 &= ~0xe007fffe; + base1 &= ~0xe007fffe; + + /* Set the appropriate DIMM base address register */ + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1); +#if QRANK_DIMM_SUPPORT == 1 + if(sz->rank == 4) { + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1); + } +#endif + + /* Enable the memory clocks for this DIMM by Clear the MemClkDis bit*/ + if (base0) { + uint32_t dword; + uint32_t ClkDis0; +#if CPU_SOCKET_TYPE == 0x10 /* L1 */ + ClkDis0 = DTL_MemClkDis0; +#else + #if CPU_SOCKET_TYPE == 0x11 /* AM2 */ + ClkDis0 = DTL_MemClkDis0_AM2; + #endif +#endif + + dword = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW); //Channel A + dword &= ~(ClkDis0 >> index); +#if QRANK_DIMM_SUPPORT == 1 + if(sz->rank == 4) { + dword &= ~(ClkDis0 >> (index+2)); + } +#endif + pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dword); + + if (is_Width128) { //Channel B + dword = pci_read_config32(ctrl->f2, DRAM_CTRL_MISC); + dword &= ~(ClkDis0 >> index); +#if QRANK_DIMM_SUPPORT == 1 + if(sz->rank == 4) { + dword &= ~(ClkDis0 >> (index+2)); + } +#endif + pci_write_config32(ctrl->f2, DRAM_CTRL_MISC, dword); + } + + } +} + +/* row col bank for 64 bit + 0: 13 9 2 :128M + 1: 13 10 2 :256M + 2: 14 10 2 :512M + 3: 13 11 2 :512M + 4: 13 10 3 :512M + 5: 14 10 3 :1G + 6: 14 11 2 :1G + 7: 15 10 3 :2G + 8: 14 11 3 :2G + 9: 15 11 3 :4G + 10: 16 10 3 :4G + 11: 16 11 3 :8G +*/ + +static void set_dimm_cs_map(const struct mem_controller *ctrl, struct dimm_size *sz, unsigned index) +{ + static const uint8_t cs_map_aaa[24] = { + /* (bank=2, row=13, col=9)(3, 16, 11) ---> (0, 0, 0) (1, 3, 2) */ + //Bank2 + 0, 1, 3, + 0, 2, 6, + 0, 0, 0, + 0, 0, 0, + //Bank3 + 0, 4, 0, + 0, 5, 8, + 0, 7, 9, + 0,10,11, + }; + + uint32_t map; + + map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); + map &= ~(0xf << (index * 4)); +#if QRANK_DIMM_SUPPORT == 1 + if(sz->rank == 4) { + map &= ~(0xf << ( (index + 2) * 4)); + } +#endif + + /* Make certain side1 of the dimm is at least 128MB */ + if (sz->per_rank >= 27) { + unsigned temp_map; + temp_map = cs_map_aaa[(sz->bank-2)*3*4 + (sz->rows - 13)*3 + (sz->col - 9) ]; + map |= temp_map << (index*4); +#if QRANK_DIMM_SUPPORT == 1 + if(sz->rank == 4) { + map |= temp_map << ( (index + 2) * 4); + } +#endif + } + + pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map); + +} + +static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask, struct mem_info *meminfo) +{ + int i; + + for(i = 0; i < DIMM_SOCKETS; i++) { + struct dimm_size *sz = &(meminfo->sz[i]); + if (!(dimm_mask & (1 << i))) { + continue; + } + spd_get_dimm_size(ctrl->channel0[i], sz); + if (sz->per_rank == 0) { + return -1; /* Report SPD error */ + } + set_dimm_size(ctrl, sz, i, meminfo->is_Width128); + set_dimm_cs_map (ctrl, sz, i); + } + return dimm_mask; +} + +static void route_dram_accesses(const struct mem_controller *ctrl, + unsigned long base_k, unsigned long limit_k) +{ + /* Route the addresses to the controller node */ + unsigned node_id; + unsigned limit; + unsigned base; + unsigned index; + unsigned limit_reg, base_reg; + device_t device; + + node_id = ctrl->node_id; + index = (node_id << 3); + limit = (limit_k << 2); + limit &= 0xffff0000; + limit -= 0x00010000; + limit |= ( 0 << 8) | (node_id << 0); + base = (base_k << 2); + base &= 0xffff0000; + base |= (0 << 8) | (1<<1) | (1<<0); + + limit_reg = 0x44 + index; + base_reg = 0x40 + index; + for(device = PCI_DEV(0, 0x18, 1); device <= PCI_DEV(0, 0x1f, 1); device += PCI_DEV(0, 1, 0)) { + pci_write_config32(device, limit_reg, limit); + pci_write_config32(device, base_reg, base); + } +} + +static void set_top_mem(unsigned tom_k, unsigned hole_startk) +{ + /* Error if I don't have memory */ + if (!tom_k) { + die("No memory?"); + } + + /* Report the amount of memory. */ + print_debug("RAM: 0x"); + print_debug_hex32(tom_k); + print_debug(" KB\r\n"); + + msr_t msr; + if(tom_k > (4*1024*1024)) { + /* Now set top of memory */ + msr.lo = (tom_k & 0x003fffff) << 10; + msr.hi = (tom_k & 0xffc00000) >> 22; + wrmsr(TOP_MEM2, msr); + } + + /* Leave a 64M hole between TOP_MEM and TOP_MEM2 + * so I can see my rom chip and other I/O devices. + */ + if (tom_k >= 0x003f0000) { +#if HW_MEM_HOLE_SIZEK != 0 + if(hole_startk != 0) { + tom_k = hole_startk; + } else +#endif + tom_k = 0x3f0000; + } + msr.lo = (tom_k & 0x003fffff) << 10; + msr.hi = (tom_k & 0xffc00000) >> 22; + wrmsr(TOP_MEM, msr); +} + +static unsigned long interleave_chip_selects(const struct mem_controller *ctrl, int is_Width128) +{ + /* 35 - 27 */ + + static const uint8_t csbase_low_f0_shift[] = { + /* 128MB */ (14 - (13-5)), + /* 256MB */ (15 - (13-5)), + /* 512MB */ (15 - (13-5)), + /* 512MB */ (16 - (13-5)), + /* 512MB */ (16 - (13-5)), + /* 1GB */ (16 - (13-5)), + /* 1GB */ (16 - (13-5)), + /* 2GB */ (16 - (13-5)), + /* 2GB */ (17 - (13-5)), + /* 4GB */ (17 - (13-5)), + /* 4GB */ (16 - (13-5)), + /* 8GB */ (17 - (13-5)), + }; + + /* cs_base_high is not changed */ + + uint32_t csbase_inc; + int chip_selects, index; + int bits; + unsigned common_size; + unsigned common_cs_mode; + uint32_t csbase, csmask; + + /* See if all of the memory chip selects are the same size + * and if so count them. + */ + chip_selects = 0; + common_size = 0; + common_cs_mode = 0xff; + for(index = 0; index < 8; index++) { + unsigned size; + unsigned cs_mode; + uint32_t value; + + value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2)); + + /* Is it enabled? */ + if (!(value & 1)) { + continue; + } + chip_selects++; + size = (value >> 19) & 0x3ff; + if (common_size == 0) { + common_size = size; + } + /* The size differed fail */ + if (common_size != size) { + return 0; + } + + value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); + cs_mode =( value >> ((index>>1)*4)) & 0xf; + if(common_cs_mode == 0xff) { + common_cs_mode = cs_mode; + } + /* The cs_mode differed fail */ + if(common_cs_mode != cs_mode) { + return 0; + } + } + + /* Chip selects can only be interleaved when there is + * more than one and their is a power of two of them. + */ + bits = log2(chip_selects); + if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) { //chip_selects max = 8 + return 0; + } + + /* Find the bits of csbase that we need to interleave on */ + csbase_inc = 1 << (csbase_low_f0_shift[common_cs_mode]); + if(is_Width128) { + csbase_inc <<=1; + } + + + /* Compute the initial values for csbase and csbask. + * In csbase just set the enable bit and the base to zero. + * In csmask set the mask bits for the size and page level interleave. + */ + csbase = 0 | 1; + csmask = (((common_size << bits) - 1) << 19); + csmask |= 0x3fe0 & ~((csbase_inc << bits) - csbase_inc); + for(index = 0; index < 8; index++) { + uint32_t value; + + value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2)); + /* Is it enabled? */ + if (!(value & 1)) { + continue; + } + pci_write_config32(ctrl->f2, DRAM_CSBASE + (index << 2), csbase); + if((index & 1) == 0) { //only have 4 CSMASK + pci_write_config32(ctrl->f2, DRAM_CSMASK + ((index>>1) << 2), csmask); + } + csbase += csbase_inc; + } + + print_debug("Interleaved\r\n"); + + /* Return the memory size in K */ + return common_size << ((27-10) + bits); +} +static unsigned long order_chip_selects(const struct mem_controller *ctrl) +{ + unsigned long tom; + + /* Remember which registers we have used in the high 8 bits of tom */ + tom = 0; + for(;;) { + /* Find the largest remaining canidate */ + unsigned index, canidate; + uint32_t csbase, csmask; + unsigned size; + csbase = 0; + canidate = 0; + for(index = 0; index < 8; index++) { + uint32_t value; + value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2)); + + /* Is it enabled? */ + if (!(value & 1)) { + continue; + } + + /* Is it greater? */ + if (value <= csbase) { + continue; + } + + /* Has it already been selected */ + if (tom & (1 << (index + 24))) { + continue; + } + /* I have a new canidate */ + csbase = value; + canidate = index; + } + /* See if I have found a new canidate */ + if (csbase == 0) { + break; + } + + /* Remember the dimm size */ + size = csbase >> 19; + + /* Remember I have used this register */ + tom |= (1 << (canidate + 24)); + + /* Recompute the cs base register value */ + csbase = (tom << 19) | 1; + + /* Increment the top of memory */ + tom += size; + + /* Compute the memory mask */ + csmask = ((size -1) << 19); + csmask |= 0x3fe0; /* For now don't optimize */ + + /* Write the new base register */ + pci_write_config32(ctrl->f2, DRAM_CSBASE + (canidate << 2), csbase); + /* Write the new mask register */ + if((canidate & 1) == 0) { //only have 4 CSMASK + pci_write_config32(ctrl->f2, DRAM_CSMASK + ((canidate>>1) << 2), csmask); + } + + } + /* Return the memory size in K */ + return (tom & ~0xff000000) << (27-10); +} + +unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id) +{ + unsigned node_id; + unsigned end_k; + /* Find the last memory address used */ + end_k = 0; + for(node_id = 0; node_id < max_node_id; node_id++) { + uint32_t limit, base; + unsigned index; + index = node_id << 3; + base = pci_read_config32(ctrl->f1, 0x40 + index); + /* Only look at the limit if the base is enabled */ + if ((base & 3) == 3) { + limit = pci_read_config32(ctrl->f1, 0x44 + index); + end_k = ((limit + 0x00010000) & 0xffff0000) >> 2; + } + } + return end_k; +} + +static void order_dimms(const struct mem_controller *ctrl, struct mem_info *meminfo) +{ + unsigned long tom_k, base_k; + + if (read_option(CMOS_VSTART_interleave_chip_selects, CMOS_VLEN_interleave_chip_selects, 1) != 0) { + tom_k = interleave_chip_selects(ctrl, meminfo->is_Width128); + } else { + print_debug("Interleaving disabled\r\n"); + tom_k = 0; + } + if (!tom_k) { + tom_k = order_chip_selects(ctrl); + } + /* Compute the memory base address */ + base_k = memory_end_k(ctrl, ctrl->node_id); + tom_k += base_k; + route_dram_accesses(ctrl, base_k, tom_k); + set_top_mem(tom_k, 0); +} + +static long disable_dimm(const struct mem_controller *ctrl, unsigned index, long dimm_mask, struct mem_info *meminfo) +{ + print_debug("disabling dimm"); + print_debug_hex8(index); + print_debug("\r\n"); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), 0); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), 0); +#if QRANK_DIMM_SUPPORT == 1 + if(meminfo->sz[index].rank == 4) { + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), 0); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), 0); + } +#endif + + dimm_mask &= ~(1 << index); + return dimm_mask; +} + +static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long dimm_mask, struct mem_info *meminfo) +{ + int i; + uint32_t registered; + uint32_t dcl; + registered = 0; + for(i = 0; (i < DIMM_SOCKETS); i++) { + int value; + if (!(dimm_mask & (1 << i))) { + continue; + } + value = spd_read_byte(ctrl->channel0[i], SPD_DIMM_TYPE); + if (value < 0) { + return -1; + } + /* Registered dimm ? */ + value &= 0x3f; + if ((value == SPD_DIMM_TYPE_RDIMM) || (value == SPD_DIMM_TYPE_mRDIMM)) { + //check SPD_MOD_ATTRIB to verify it is SPD_MOD_ATTRIB_REGADC (0x11)? + registered |= (1<<i); + } + } + + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl &= ~DCL_UnBuffDimm; + meminfo->is_registered = 1; + if (!registered) { + dcl |= DCL_UnBuffDimm; + meminfo->is_registered = 0; + } + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); + +#if 1 + if (meminfo->is_registered) { + print_debug("Registered\r\n"); + } else { + print_debug("Unbuffered\r\n"); + } +#endif + return dimm_mask; +} + +static unsigned int spd_detect_dimms(const struct mem_controller *ctrl) +{ + unsigned dimm_mask; + int i; + dimm_mask = 0; + for(i = 0; i < DIMM_SOCKETS; i++) { + int byte; + unsigned device; + device = ctrl->channel0[i]; + if (device) { + byte = spd_read_byte(ctrl->channel0[i], SPD_MEM_TYPE); /* Type */ + if (byte == SPD_MEM_TYPE_SDRAM_DDR2) { + dimm_mask |= (1 << i); + } + } + device = ctrl->channel1[i]; + if (device) { + byte = spd_read_byte(ctrl->channel1[i], SPD_MEM_TYPE); + if (byte == SPD_MEM_TYPE_SDRAM_DDR2) { + dimm_mask |= (1 << (i + DIMM_SOCKETS)); + } + } + } + return dimm_mask; +} + +static long spd_enable_2channels(const struct mem_controller *ctrl, long dimm_mask, struct mem_info *meminfo) +{ + int i; + uint32_t nbcap; + /* SPD addresses to verify are identical */ + static const uint8_t addresses[] = { + 2, /* Type should be DDR2 SDRAM */ + 3, /* *Row addresses */ + 4, /* *Column addresses */ + 5, /* *Number of DIMM Ranks */ + 6, /* *Module Data Width*/ + 9, /* *Cycle time at highest CAS Latency CL=X */ + 11, /* *DIMM Conf Type */ + 13, /* *Pri SDRAM Width */ + 17, /* *Logical Banks */ + 18, /* *Supported CAS Latencies */ + 20, /* *DIMM Type Info */ + 21, /* *SDRAM Module Attributes */ + 23, /* *Cycle time at CAS Latnecy (CLX - 1) */ + 26, /* *Cycle time at CAS Latnecy (CLX - 2) */ + 27, /* *tRP Row precharge time */ + 28, /* *Minimum Row Active to Row Active Delay (tRRD) */ + 29, /* *tRCD RAS to CAS */ + 30, /* *tRAS Activate to Precharge */ + 36, /* *Write recovery time (tWR) */ + 37, /* *Internal write to read command delay (tRDP) */ + 38, /* *Internal read to precharge commanfd delay (tRTP) */ + 41, /* *Extension of Byte 41 tRC and Byte 42 tRFC */ + 41, /* *Minimum Active to Active/Auto Refresh Time(Trc) */ + 42, /* *Minimum Auto Refresh Command Time(Trfc) */ + }; + /* If the dimms are not in pairs do not do dual channels */ + if ((dimm_mask & ((1 << DIMM_SOCKETS) - 1)) != + ((dimm_mask >> DIMM_SOCKETS) & ((1 << DIMM_SOCKETS) - 1))) { + goto single_channel; + } + /* If the cpu is not capable of doing dual channels don't do dual channels */ + nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP); + if (!(nbcap & NBCAP_128Bit)) { + goto single_channel; + } + for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) { + unsigned device0, device1; + int value0, value1; + int j; + /* If I don't have a dimm skip this one */ + if (!(dimm_mask & (1 << i))) { + continue; + } + device0 = ctrl->channel0[i]; + device1 = ctrl->channel1[i]; + for(j = 0; j < sizeof(addresses)/sizeof(addresses[0]); j++) { + unsigned addr; + addr = addresses[j]; + value0 = spd_read_byte(device0, addr); + if (value0 < 0) { + return -1; + } + value1 = spd_read_byte(device1, addr); + if (value1 < 0) { + return -1; + } + if (value0 != value1) { + goto single_channel; + } + } + } + print_spew("Enabling dual channel memory\r\n"); + uint32_t dcl; + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl &= ~DCL_BurstLength32; /* 32byte mode may be preferred in platforms that include graphics controllers that generate a lot of 32-bytes system memory accesses + 32byte mode is not supported when the DRAM interface is 128 bits wides, even 32byte mode is set, system still use 64 byte mode */ + dcl |= DCL_Width128; + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); + meminfo->is_Width128 = 1; + return dimm_mask; + single_channel: + dimm_mask &= ~((1 << (DIMM_SOCKETS *2)) - (1 << DIMM_SOCKETS)); + meminfo->is_Width128 = 0; + return dimm_mask; +} + +struct mem_param { + uint16_t cycle_time; + uint8_t divisor; /* In 1/40 ns increments */ + uint8_t TrwtTO; + uint8_t Twrrd; + uint8_t Twrwr; + uint8_t Trdrd; + uint8_t DcqByPassMax; + uint32_t dch_memclk; + char name[9]; +}; + + static const struct mem_param speed[] = { + { + .name = "200Mhz\r\n", + .cycle_time = 0x500, + .divisor = 200, // how many 1/40ns per clock + .dch_memclk = DCH_MemClkFreq_200MHz, //0 + .TrwtTO = 7, + .Twrrd = 2, + .Twrwr = 2, + .Trdrd = 3, + .DcqByPassMax = 4, + + }, + { + .name = "266Mhz\r\n", + .cycle_time = 0x375, + .divisor = 150, //???? + .dch_memclk = DCH_MemClkFreq_266MHz, //1 + .TrwtTO = 7, + .Twrrd = 2, + .Twrwr = 2, + .Trdrd = 3, + .DcqByPassMax = 4, + }, + { + .name = "333Mhz\r\n", + .cycle_time = 0x300, + .divisor = 120, + .dch_memclk = DCH_MemClkFreq_333MHz, //2 + .TrwtTO = 7, + .Twrrd = 2, + .Twrwr = 2, + .Trdrd = 3, + .DcqByPassMax = 4, + + }, + { + .name = "400Mhz\r\n", + .cycle_time = 0x250, + .divisor = 100, + .dch_memclk = DCH_MemClkFreq_400MHz,//3 + .TrwtTO = 7, + .Twrrd = 2, + .Twrwr = 2, + .Trdrd = 3, + .DcqByPassMax = 4, + }, + { + .cycle_time = 0x000, + }, + }; + +static const struct mem_param *get_mem_param(unsigned min_cycle_time) +{ + + const struct mem_param *param; + for(param = &speed[0]; param->cycle_time ; param++) { + if (min_cycle_time > (param+1)->cycle_time) { + break; + } + } + if (!param->cycle_time) { + die("min_cycle_time to low"); + } + print_spew(param->name); +#ifdef DRAM_MIN_CYCLE_TIME + print_debug(param->name); +#endif + return param; +} + +static uint8_t get_exact_divisor(int i, uint8_t divisor) +{ + //input divisor could be 200(200), 150(266), 120(333), 100 (400) + static const uint8_t dv_a[] = { + /* 200 266 333 400 */ + /*4 */ 250, 250, 250, 250, + /*5 */ 200, 200, 200, 100, + /*6 */ 200, 166, 166, 100, + /*7 */ 200, 171, 142, 100, + + /*8 */ 200, 150, 125, 100, + /*9 */ 200, 156, 133, 100, + /*10*/ 200, 160, 120, 100, + /*11*/ 200, 163, 127, 100, + + /*12*/ 200, 150, 133, 100, + /*13*/ 200, 153, 123, 100, + /*14*/ 200, 157, 128, 100, + /*15*/ 200, 160, 120, 100, + }; + + unsigned fid_cur; + int index; + + msr_t msr; + msr = rdmsr(0xc0010042); + fid_cur = msr.lo & 0x3f; + + index = fid_cur>>1; + + if(index>12) return divisor; + + if(i>3) return divisor; + + return dv_a[index * 4+i]; + +} + +struct spd_set_memclk_result { + const struct mem_param *param; + long dimm_mask; +}; + +static unsigned convert_to_linear(unsigned value) +{ + static const unsigned fraction[] = { 0x25, 0x33, 0x66, 0x75 }; + unsigned valuex; + + /* We need to convert value to more readable */ + if((value & 0xf) < 10) { //no .25, .33, .66, .75 + value <<= 4; + } else { + valuex = ((value & 0xf0) << 4) | fraction [(value & 0xf)-10]; + value = valuex; + } + return value; +} + +static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller *ctrl, long dimm_mask, struct mem_info *meminfo) +{ + /* Compute the minimum cycle time for these dimms */ + struct spd_set_memclk_result result; + unsigned min_cycle_time, min_latency, bios_cycle_time; + int i; + uint32_t value; + + static const uint8_t latency_indicies[] = { 25, 23, 9 }; + + static const uint16_t min_cycle_times[] = { // use full speed to compare + [NBCAP_MEMCLK_NOLIMIT] = 0x250, /*2.5ns */ + [NBCAP_MEMCLK_333MHZ] = 0x300, /* 3.0ns */ + [NBCAP_MEMCLK_266MHZ] = 0x375, /* 3.75ns */ + [NBCAP_MEMCLK_200MHZ] = 0x500, /* 5.0s */ + }; + + + value = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP); + min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK]; + bios_cycle_time = min_cycle_times[ + read_option(CMOS_VSTART_max_mem_clock, CMOS_VLEN_max_mem_clock, 0)]; + if (bios_cycle_time > min_cycle_time) { + min_cycle_time = bios_cycle_time; + } + min_latency = 3; + + print_tx("1 min_cycle_time:", min_cycle_time); + + /* Compute the least latency with the fastest clock supported + * by both the memory controller and the dimms. + */ + for(i = 0; i < DIMM_SOCKETS; i++) { + int new_cycle_time, new_latency; + int index; + int latencies; + int latency; + + if (!(dimm_mask & (1 << i))) { + continue; + } + + /* First find the supported CAS latencies + * Byte 18 for DDR SDRAM is interpreted: + * bit 3 == CAS Latency = 3 + * bit 4 == CAS Latency = 4 + * bit 5 == CAS Latency = 5 + * bit 6 == CAS Latency = 6 + */ + new_cycle_time = 0x500; + new_latency = 6; + + latencies = spd_read_byte(ctrl->channel0[i], SPD_CAS_LAT); + if (latencies <= 0) continue; + + print_tx("i:",i); + print_tx("\tlatencies:", latencies); + /* Compute the lowest cas latency supported */ + latency = log2(latencies) - 2; + + /* Loop through and find a fast clock with a low latency */ + for(index = 0; index < 3; index++, latency++) { + int value; + if ((latency < 3) || (latency > 6) || + (!(latencies & (1 << latency)))) { + continue; + } + value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]); + if (value < 0) { + goto hw_error; + } + print_tx("\tindex:", index); + print_tx("\t\tlatency:", latency); + print_tx("\t\tvalue1:", value); + + value = convert_to_linear(value); + + print_tx("\t\tvalue2:", value); + + /* Only increase the latency if we decreas the clock */ + if (value >= min_cycle_time ) { + if(value < new_cycle_time) { + new_cycle_time = value; + new_latency = latency; + } else if (value == new_cycle_time) { + if(new_latency > latency) { + new_latency = latency; + } + } + } + print_tx("\t\tnew_cycle_time:", new_cycle_time); + print_tx("\t\tnew_latency:", new_latency); + + } + if (new_latency > 6){ + continue; + } + /* Does min_latency need to be increased? */ + if (new_cycle_time > min_cycle_time) { + min_cycle_time = new_cycle_time; + } + /* Does min_cycle_time need to be increased? */ + if (new_latency > min_latency) { + min_latency = new_latency; + } + + print_tx("2 min_cycle_time:", min_cycle_time); + print_tx("2 min_latency:", min_latency); + } + /* Make a second pass through the dimms and disable + * any that cannot support the selected memclk and cas latency. + */ + + print_tx("3 min_cycle_time:", min_cycle_time); + print_tx("3 min_latency:", min_latency); + + for(i = 0; (i < DIMM_SOCKETS) && (ctrl->channel0[i]); i++) { + int latencies; + int latency; + int index; + int value; + if (!(dimm_mask & (1 << i))) { + continue; + } + latencies = spd_read_byte(ctrl->channel0[i], SPD_CAS_LAT); + if (latencies < 0) goto hw_error; + if (latencies == 0) { + continue; +// goto dimm_err; + } + + /* Compute the lowest cas latency supported */ + latency = log2(latencies) -2; + + /* Walk through searching for the selected latency */ + for(index = 0; index < 3; index++, latency++) { + if (!(latencies & (1 << latency))) { + continue; + } + if (latency == min_latency) + break; + } + /* If I can't find the latency or my index is bad error */ + if ((latency != min_latency) || (index >= 3)) { + goto dimm_err; + } + + /* Read the min_cycle_time for this latency */ + value = spd_read_byte(ctrl->channel0[i], latency_indicies[index]); + if (value < 0) goto hw_error; + + value = convert_to_linear(value); + /* All is good if the selected clock speed + * is what I need or slower. + */ + if (value <= min_cycle_time) { + continue; + } + /* Otherwise I have an error, disable the dimm */ + dimm_err: + dimm_mask = disable_dimm(ctrl, i, dimm_mask, meminfo); + } + + print_tx("4 min_cycle_time:", min_cycle_time); + + /* Now that I know the minimum cycle time lookup the memory parameters */ + result.param = get_mem_param(min_cycle_time); + + /* Update DRAM Config High with our selected memory speed */ + value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + value &= ~(DCH_MemClkFreq_MASK << DCH_MemClkFreq_SHIFT); + + value |= result.param->dch_memclk << DCH_MemClkFreq_SHIFT; + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value); + + print_debug(result.param->name); + + /* Update DRAM Timing Low with our selected cas latency */ + value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW); + value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT); + value |= (min_latency - DTL_TCL_BASE) << DTL_TCL_SHIFT; + pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value); + + result.dimm_mask = dimm_mask; + return result; + hw_error: + result.param = (const struct mem_param *)0; + result.dimm_mask = -1; + return result; +} + +static unsigned convert_to_1_4(unsigned value) +{ + static const uint8_t fraction[] = { 0, 1, 2, 2, 3, 3, 0 }; + unsigned valuex; + + /* We need to convert value to more readable */ + valuex = fraction [value & 0x7]; + return valuex; +} +static int update_dimm_Trc(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + unsigned clocks, old_clocks; + uint32_t dtl; + int value; + int value2; + value = spd_read_byte(ctrl->channel0[i], SPD_TRC); + if (value < 0) return -1; + + value2 = spd_read_byte(ctrl->channel0[i], SPD_TRC -1); + value <<= 2; + value += convert_to_1_4(value2>>4); + + value *=10; + + clocks = (value + param->divisor - 1)/param->divisor; + + if (clocks < DTL_TRC_MIN) { + clocks = DTL_TRC_MIN; + } + if (clocks > DTL_TRC_MAX) { + return 0; + } + + dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW); + old_clocks = ((dtl >> DTL_TRC_SHIFT) & DTL_TRC_MASK) + DTL_TRC_BASE; + if (old_clocks >= clocks) { //?? someone did it + // clocks = old_clocks; + return 1; + } + dtl &= ~(DTL_TRC_MASK << DTL_TRC_SHIFT); + dtl |= ((clocks - DTL_TRC_BASE) << DTL_TRC_SHIFT); + pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl); + return 1; +} + +static int update_dimm_Trfc(const struct mem_controller *ctrl, const struct mem_param *param, int i, struct mem_info *meminfo) +{ + unsigned clocks, old_clocks; + uint32_t dth; + int value; + +#if 0 + int value2; + + value = spd_read_byte(ctrl->channel0[i], SPD_TRFC); + if (value < 0) return -1; + + value2 = spd_read_byte(ctrl->channel0[i], SPD_TRC -1); + if(value2 & 1) value += 256; + value <<= 2; + value += convert_to_1_4(value2>>1); + + if (value == 0) { + value = param->tRFC; + } + value *= 10; + clocks = (value + param->divisor - 1)/param->divisor; +#endif + //get the cs_size --> logic dimm size + value = spd_read_byte(ctrl->channel0[i], SPD_PRI_WIDTH); + if (value < 0) { + return -1; + } + + value = 6 - log2(value); //4-->4, 8-->3, 16-->2 + + clocks = meminfo->sz[i].per_rank - 27 + 2 - value; + + dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH); + + old_clocks = ((dth >> (DTH_TRFC0_SHIFT+i*3)) & DTH_TRFC_MASK); + if (old_clocks >= clocks) { // some one did it? +// clocks = old_clocks; + return 1; + } + dth &= ~(DTH_TRFC_MASK << (DTH_TRFC0_SHIFT+i*3)); + dth |= clocks << (DTH_TRFC0_SHIFT+i*3); + pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth); + return 1; +} + +static int update_dimm_TT_1_4(const struct mem_controller *ctrl, const struct mem_param *param, int i, + unsigned TT_REG, + unsigned SPD_TT, unsigned TT_SHIFT, unsigned TT_MASK, unsigned TT_BASE, unsigned TT_MIN, unsigned TT_MAX ) +{ + unsigned clocks, old_clocks; + uint32_t dtl; + int value; + value = spd_read_byte(ctrl->channel0[i], SPD_TT); //already in 1/4 ns + if (value < 0) return -1; + value *=10; + clocks = (value + param->divisor -1)/param->divisor; + if (clocks < TT_MIN) { + clocks = TT_MIN; + } + if (clocks > TT_MAX) { + return 0; + } + dtl = pci_read_config32(ctrl->f2, TT_REG); + + old_clocks = ((dtl >> TT_SHIFT) & TT_MASK) + TT_BASE; + if (old_clocks >= clocks) { //some one did it? +// clocks = old_clocks; + return 1; + } + dtl &= ~(TT_MASK << TT_SHIFT); + dtl |= ((clocks - TT_BASE) << TT_SHIFT); + pci_write_config32(ctrl->f2, TT_REG, dtl); + return 1; +} + +static int update_dimm_Trcd(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_LOW, SPD_TRCD, DTL_TRCD_SHIFT, DTL_TRCD_MASK, DTL_TRCD_BASE, DTL_TRCD_MIN, DTL_TRCD_MAX); +} + +static int update_dimm_Trrd(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_LOW, SPD_TRRD, DTL_TRRD_SHIFT, DTL_TRRD_MASK, DTL_TRRD_BASE, DTL_TRRD_MIN, DTL_TRRD_MAX); +} + +static int update_dimm_Tras(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + unsigned clocks, old_clocks; + uint32_t dtl; + int value; + value = spd_read_byte(ctrl->channel0[i], SPD_TRAS); //in 1 ns + if (value < 0) return -1; + print_tx("update_dimm_Tras: 0 value=", value); + + value<<=2; //convert it to in 1/4ns + + value *= 10; + print_tx("update_dimm_Tras: 1 value=", value); + + clocks = (value + param->divisor - 1)/param->divisor; + print_tx("update_dimm_Tras: divisor=", param->divisor); + print_tx("update_dimm_Tras: clocks=", clocks); + if (clocks < DTL_TRAS_MIN) { + clocks = DTL_TRAS_MIN; + } + if (clocks > DTL_TRAS_MAX) { + return 0; + } + dtl = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW); + old_clocks = ((dtl >> DTL_TRAS_SHIFT) & DTL_TRAS_MASK) + DTL_TRAS_BASE; + if (old_clocks >= clocks) { // someone did it? +// clocks = old_clocks; + return 1; + } + dtl &= ~(DTL_TRAS_MASK << DTL_TRAS_SHIFT); + dtl |= ((clocks - DTL_TRAS_BASE) << DTL_TRAS_SHIFT); + pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, dtl); + return 1; +} + +static int update_dimm_Trp(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_LOW, SPD_TRP, DTL_TRP_SHIFT, DTL_TRP_MASK, DTL_TRP_BASE, DTL_TRP_MIN, DTL_TRP_MAX); +} + +static int update_dimm_Trtp(const struct mem_controller *ctrl, const struct mem_param *param, int i, struct mem_info *meminfo) +{ + //need to figure if it is 32 byte burst or 64 bytes burst + int offset = 2; + if(!meminfo->is_Width128) { + uint32_t dword; + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + if((dword & DCL_BurstLength32)) offset = 0; + } + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_LOW, SPD_TRTP, DTL_TRTP_SHIFT, DTL_TRTP_MASK, DTL_TRTP_BASE+offset, DTL_TRTP_MIN+offset, DTL_TRTP_MAX+offset); +} + + +static int update_dimm_Twr(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_LOW, SPD_TWR, DTL_TWR_SHIFT, DTL_TWR_MASK, DTL_TWR_BASE, DTL_TWR_MIN, DTL_TWR_MAX); +} + + +static int update_dimm_Tref(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + uint32_t dth, dth_old; + int value; + value = spd_read_byte(ctrl->channel0[i], SPD_TREF); // 0: 15.625us, 1: 3.9us 2: 7.8 us.... + if (value < 0) return -1; + + if(value == 1 ) { + value = 3; + } else { + value = 2; + } + + dth = pci_read_config32(ctrl->f2, DRAM_TIMING_HIGH); + + dth_old = dth; + dth &= ~(DTH_TREF_MASK << DTH_TREF_SHIFT); + dth |= (value << DTH_TREF_SHIFT); + if(dth_old != dth) { + pci_write_config32(ctrl->f2, DRAM_TIMING_HIGH, dth); + } + return 1; +} + +static void set_4RankRDimm(const struct mem_controller *ctrl, const struct mem_param *param, struct mem_info *meminfo) +{ +#if QRANK_DIMM_SUPPRT == 1 + int value; + int i; + + + if(!(meminfo->is_registered)) return; + + value = 0; + + for(i = 0; i < DIMM_SOCKETS; i++) { + if (!(dimm_mask & (1 << i))) { + continue; + } + + if(meminfo->sz.rank == 4) { + value = 1; + break; + } + } + + if(value == 1) { + uint32_t dch; + dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + dch |= DCH_FourRankRDimm; + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch); + } +#endif +} + + +static uint32_t get_extra_dimm_mask(const struct mem_controller *ctrl, struct mem_info *meminfo) +{ + int i; + + uint32_t mask_x4; + uint32_t mask_x16; + uint32_t mask_single_rank; + uint32_t mask_page_1k; + int value; +#if QRANK_DIMM_SUPPORT == 1 + int rank; +#endif + + long dimm_mask = meminfo->dimm_mask; + + + mask_x4 = 0; + mask_x16 = 0; + mask_single_rank = 0; + mask_page_1k = 0; + + for(i = 0; i < DIMM_SOCKETS; i++) { + + if (!(dimm_mask & (1 << i))) { + continue; + } + + if(meminfo->sz[i].rank == 1) { + mask_single_rank |= 1<<i; + } + + if(meminfo->sz[i].col==10) { + mask_page_1k |= 1<<i; + } + + + value = spd_read_byte(ctrl->channel0[i], SPD_PRI_WIDTH); + + #if QRANK_DIMM_SUPPORT == 1 + rank = meminfo->sz[i].rank; + #endif + + if(value==4) { + mask_x4 |= (1<<i); + #if QRANK_DIMM_SUPPORT == 1 + if(rank==4) { + mask_x4 |= 1<<(i+2); + } + #endif + } else if(value==16) { + mask_x16 |= (1<<i); + #if QRANK_DIMM_SUPPORT == 1 + if(rank==4) { + mask_x16 |= 1<<(i+2); + } + #endif + } + + } + + meminfo->x4_mask= mask_x4; + meminfo->x16_mask = mask_x16; + + meminfo->single_rank_mask = mask_single_rank; + meminfo->page_1k_mask = mask_page_1k; + + return mask_x4; + +} + + +static void set_dimm_x4(const struct mem_controller *ctrl, const struct mem_param *param, struct mem_info *meminfo) +{ + uint32_t dcl; + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl &= ~(DCL_X4Dimm_MASK<<DCL_X4Dimm_SHIFT); + dcl |= ((meminfo->x4_mask) & 0xf) << (DCL_X4Dimm_SHIFT); + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); +} + +static int count_ones(uint32_t dimm_mask) +{ + int dimms; + unsigned index; + dimms = 0; + for(index = 0; index < DIMM_SOCKETS; index++, dimm_mask>>=1) { + if (dimm_mask & 1) { + dimms++; + } + } + return dimms; +} + + +static void set_DramTerm(const struct mem_controller *ctrl, const struct mem_param *param, struct mem_info *meminfo) +{ + uint32_t dcl; + unsigned odt; + odt = 1; // 75 ohms + + if(param->divisor == 100) { //DDR2 800 + if(meminfo->is_Width128) { + if(count_ones(meminfo->dimm_mask & 0x0f)==2) { + odt = 3; //50 ohms + } + } + + } + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl &= ~(DCL_DramTerm_MASK<<DCL_DramTerm_SHIFT); + dcl |= (odt & DCL_DramTerm_MASK) << (DCL_DramTerm_SHIFT); + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); +} + + +static void set_ecc(const struct mem_controller *ctrl,const struct mem_param *param, long dimm_mask, struct mem_info *meminfo) +{ + int i; + int value; + + uint32_t dcl, nbcap; + nbcap = pci_read_config32(ctrl->f3, NORTHBRIDGE_CAP); + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl &= ~DCL_DimmEccEn; + if (nbcap & NBCAP_ECC) { + dcl |= DCL_DimmEccEn; + } + if (read_option(CMOS_VSTART_ECC_memory, CMOS_VLEN_ECC_memory, 1) == 0) { + dcl &= ~DCL_DimmEccEn; + } + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); + + meminfo->is_ecc = 1; + if(!(dcl & DCL_DimmEccEn)) { + meminfo->is_ecc = 0; + return; // already disabled the ECC, so don't need to read SPD any more + } + + for(i = 0; i < DIMM_SOCKETS; i++) { + + if (!(dimm_mask & (1 << i))) { + continue; + } + + value = spd_read_byte(ctrl->channel0[i], SPD_DIMM_CONF_TYPE); + + if(!(value & SPD_DIMM_CONF_TYPE_ECC)) { + dcl &= ~DCL_DimmEccEn; + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); + meminfo->is_ecc = 0; + return; + } + + } +} + +static int update_dimm_Twtr(const struct mem_controller *ctrl, const struct mem_param *param, int i) +{ + + return update_dimm_TT_1_4(ctrl, param, i, DRAM_TIMING_HIGH, SPD_TWTR, DTH_TWTR_SHIFT, DTH_TWTR_MASK, DTH_TWTR_BASE, DTH_TWTR_MIN, DTH_TWTR_MAX); + +} + +static void set_TT(const struct mem_controller *ctrl, const struct mem_param *param, unsigned TT_REG, + unsigned TT_SHIFT, unsigned TT_MASK, unsigned TT_BASE, unsigned TT_MIN, unsigned TT_MAX, unsigned val, const char *str) +{ + uint32_t reg; + + if ((val < TT_MIN) || (val > TT_MAX)) { + print_err(str); + die(" Unknown\r\n"); + } + + reg = pci_read_config32(ctrl->f2, TT_REG); + reg &= ~(TT_MASK << TT_SHIFT); + reg |= ((val - TT_BASE) << TT_SHIFT); + pci_write_config32(ctrl->f2, TT_REG, reg); + return; +} + +static void set_TrwtTO(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_TIMING_HIGH, DTH_TRWTTO_SHIFT, DTH_TRWTTO_MASK,DTH_TRWTTO_BASE, DTH_TRWTTO_MIN, DTH_TRWTTO_MAX, param->TrwtTO, "TrwtTO"); +} + +static void set_Twrrd(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_TIMING_HIGH, DTH_TWRRD_SHIFT, DTH_TWRRD_MASK,DTH_TWRRD_BASE, DTH_TWRRD_MIN, DTH_TWRRD_MAX, param->Twrrd, "Twrrd"); +} + +static void set_Twrwr(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_TIMING_HIGH, DTH_TWRWR_SHIFT, DTH_TWRWR_MASK,DTH_TWRWR_BASE, DTH_TWRWR_MIN, DTH_TWRWR_MAX, param->Twrwr, "Twrwr"); +} + +static void set_Trdrd(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_TIMING_HIGH, DTH_TRDRD_SHIFT, DTH_TRDRD_MASK,DTH_TRDRD_BASE, DTH_TRDRD_MIN, DTH_TRDRD_MAX, param->Trdrd, "Trdrd"); +} + +static void set_DcqBypassMax(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_CONFIG_HIGH, DCH_DcqBypassMax_SHIFT, DCH_DcqBypassMax_MASK,DCH_DcqBypassMax_BASE, DCH_DcqBypassMax_MIN, DCH_DcqBypassMax_MAX, param->DcqByPassMax, "DcqBypassMax"); // value need to be in CMOS +} + +static void set_Tfaw(const struct mem_controller *ctrl, const struct mem_param *param, struct mem_info *meminfo) +{ + static const uint8_t faw_1k[] = {8, 10, 13, 14}; + static const uint8_t faw_2k[] = {10, 14, 17, 18}; + unsigned memclkfreq_index; + unsigned faw; + + + memclkfreq_index = param->dch_memclk; + + if(meminfo->page_1k_mask != 0) { //1k page + faw = faw_1k[memclkfreq_index]; + } + else { + faw = faw_2k[memclkfreq_index]; + } + + set_TT(ctrl, param, DRAM_CONFIG_HIGH, DCH_FourActWindow_SHIFT, DCH_FourActWindow_MASK, DCH_FourActWindow_BASE, DCH_FourActWindow_MIN, DCH_FourActWindow_MAX, faw, "FourActWindow"); + +} + + +static void set_max_async_latency(const struct mem_controller *ctrl, const struct mem_param *param) +{ + uint32_t dch; + unsigned async_lat; + + + dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + dch &= ~(DCH_MaxAsyncLat_MASK << DCH_MaxAsyncLat_SHIFT); + + async_lat = 6+6; + + + dch |= ((async_lat - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT); + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch); +} + +static void set_SlowAccessMode(const struct mem_controller *ctrl) +{ + uint32_t dch; + + dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + + dch |= (1<<20); + + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch); +} + + +/* + DRAM_OUTPUT_DRV_COMP_CTRL 0, 0x20 + DRAM_ADDR_TIMING_CTRL 04, 0x24 +*/ +static void set_misc_timing(const struct mem_controller *ctrl, struct mem_info *meminfo) +{ + uint32_t dword; + uint32_t dwordx; + unsigned SlowAccessMode = 0; + + long dimm_mask = meminfo->dimm_mask & 0x0f; + +#if DIMM_SUPPORT==0x0104 /* DDR2 and REG */ + /* for REG DIMM */ + dword = 0x00111222; + dwordx = 0x002f0000; + switch (meminfo->memclk_set) { + case DCH_MemClkFreq_266MHz: + if( (dimm_mask == 0x03) || (dimm_mask == 0x02) || (dimm_mask == 0x01)) { + dwordx = 0x002f2700; + } + break; + case DCH_MemClkFreq_333MHz: + if( (dimm_mask == 0x03) || (dimm_mask == 0x02) || (dimm_mask == 0x01)) { + if ((meminfo->single_rank_mask & 0x03)!=0x03) { //any double rank there? + dwordx = 0x002f2f00; + } + } + break; + case DCH_MemClkFreq_400MHz: + dwordx = 0x002f3300; + break; + } + +#endif + +#if DIMM_SUPPORT==0x0004 /* DDR2 and unbuffered */ + /* for UNBUF DIMM */ + dword = 0x00111222; + dwordx = 0x002f2f00; + switch (meminfo->memclk_set) { + case DCH_MemClkFreq_200MHz: + if(dimm_mask == 0x03) { + SlowAccessMode = 1; + dword = 0x00111322; + } + break; + case DCH_MemClkFreq_266MHz: + if(dimm_mask == 0x03) { + SlowAccessMode = 1; + dword = 0x00111322; + if((meminfo->x4_mask == 0 ) && (meminfo->x16_mask == 0)) { + switch (meminfo->single_rank_mask) { + case 0x03: + dwordx = 0x00002f00; //x8 single Rank + break; + case 0x00: + dwordx = 0x00342f00; //x8 double Rank + break; + default: + dwordx = 0x00372f00; //x8 single Rank and double Rank mixed + } + } else if((meminfo->x4_mask == 0 ) && (meminfo->x16_mask == 0x01) && (meminfo->single_rank_mask == 0x01)) { + dwordx = 0x00382f00; //x8 Double Rank and x16 single Rank mixed + } else if((meminfo->x4_mask == 0 ) && (meminfo->x16_mask == 0x02) && (meminfo->single_rank_mask == 0x02)) { + dwordx = 0x00382f00; //x16 single Rank and x8 double Rank mixed + } + + } + else { + if((meminfo->x4_mask == 0 ) && (meminfo->x16_mask == 0x00) && ((meminfo->single_rank_mask == 0x01)||(meminfo->single_rank_mask == 0x02))) { //x8 single rank + dwordx = 0x002f2f00; + } else { + dwordx = 0x002b2f00; + } + } + break; + case DCH_MemClkFreq_333MHz: + dwordx = 0x00202220; + if(dimm_mask == 0x03) { + SlowAccessMode = 1; + dword = 0x00111322; + if((meminfo->x4_mask == 0 ) && (meminfo->x16_mask == 0)) { + switch (meminfo->single_rank_mask) { + case 0x03: + dwordx = 0x00302220; //x8 single Rank + break; + case 0x00: + dwordx = 0x002b2220; //x8 double Rank + break; + defalut: + dwordx = 0x002a2220; //x8 single Rank and double Rank mixed + } + } else if((meminfo->x4_mask == 0) && (meminfo->x16_mask == 0x01) && (meminfo->single_rank_mask == 0x01)) { + dwordx = 0x002c2220; //x8 Double Rank and x16 single Rank mixed + } else if((meminfo->x4_mask == 0) && (meminfo->x16_mask == 0x02) && (meminfo->single_rank_mask == 0x02)) { + dwordx = 0x002c2220; //x16 single Rank and x8 double Rank mixed + } + } + break; + case DCH_MemClkFreq_400MHz: + dwordx = 0x00202520; + SlowAccessMode = 1; + if(dimm_mask == 0x03) { + dword = 0x00113322; + } else { + dword = 0x00113222; + } + break; + } + + print_raminit("\tdimm_mask = ", meminfo->dimm_mask); + print_raminit("\tx4_mask = ", meminfo->x4_mask); + print_raminit("\tx16_mask = ", meminfo->x16_mask); + print_raminit("\tsingle_rank_mask = ", meminfo->single_rank_mask); + print_raminit("\tODC = ", dword); + print_raminit("\tAddr Timing= ", dwordx); +#endif + +#if (DIMM_SUPPORT & 0x0100)==0x0000 /* 2T mode only used for unbuffered DIMM */ + if(SlowAccessMode) { + set_SlowAccessMode(ctrl); + } +#endif + + /* Program the Output Driver Compensation Control Registers (Function 2:Offset 0x9c, index 0, 0x20) */ + pci_write_config32_index_wait(ctrl->f2, 0x98, 0, dword); + if(meminfo->is_Width128) { + pci_write_config32_index_wait(ctrl->f2, 0x98, 0x20, dword); + } + + /* Program the Address Timing Control Registers (Function 2:Offset 0x9c, index 4, 0x24) */ + pci_write_config32_index_wait(ctrl->f2, 0x98, 4, dwordx); + if(meminfo->is_Width128) { + pci_write_config32_index_wait(ctrl->f2, 0x98, 0x24, dwordx); + } + +} + + +static void set_RDqsEn(const struct mem_controller *ctrl, const struct mem_param *param, struct mem_info *meminfo) +{ +#if CPU_SOCKET_TYPE==0x10 + //only need to set for reg and x8 + uint32_t dch; + + dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + + dch &= ~DCH_RDqsEn; + if((!meminfo->x4_mask) && (!meminfo->x16_mask)) { + dch |= DCH_RDqsEn; + } + + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch); +#endif +} + + +static void set_idle_cycle_limit(const struct mem_controller *ctrl, const struct mem_param *param) +{ + uint32_t dcm; + /* AMD says to Hardcode this */ + dcm = pci_read_config32(ctrl->f2, DRAM_CTRL_MISC); + dcm &= ~(DCM_ILD_lmt_MASK << DCM_ILD_lmt_SHIFT); + dcm |= DCM_ILD_lmt_16 << DCM_ILD_lmt_SHIFT; + dcm |= DCM_DCC_EN; + pci_write_config32(ctrl->f2, DRAM_CTRL_MISC, dcm); +} + +static void set_RdWrQByp(const struct mem_controller *ctrl, const struct mem_param *param) +{ + set_TT(ctrl, param, DRAM_CTRL_MISC, DCM_RdWrQByp_SHIFT, DCM_RdWrQByp_MASK,0, 0, 3, 2, "RdWrQByp"); +} + + + +static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct mem_param *param, long dimm_mask, struct mem_info *meminfo) +{ + int i; + + for(i = 0; i < DIMM_SOCKETS; i++) { + int rc; + if (!(dimm_mask & (1 << i))) { + continue; + } + print_tx("dimm socket: ", i); + /* DRAM Timing Low Register */ + print_t("\ttrc\r\n"); + if ((rc = update_dimm_Trc (ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttrcd\r\n"); + if ((rc = update_dimm_Trcd(ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttrrd\r\n"); + if ((rc = update_dimm_Trrd(ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttras\r\n"); + if ((rc = update_dimm_Tras(ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttrp\r\n"); + if ((rc = update_dimm_Trp (ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttrtp\r\n"); + if ((rc = update_dimm_Trtp(ctrl, param, i, meminfo)) <= 0) goto dimm_err; + + print_t("\ttwr\r\n"); + if ((rc = update_dimm_Twr (ctrl, param, i)) <= 0) goto dimm_err; + + /* DRAM Timing High Register */ + print_t("\ttref\r\n"); + if ((rc = update_dimm_Tref(ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttwtr\r\n"); + if ((rc = update_dimm_Twtr(ctrl, param, i)) <= 0) goto dimm_err; + + print_t("\ttrfc\r\n"); + if ((rc = update_dimm_Trfc(ctrl, param, i, meminfo)) <= 0) goto dimm_err; + + /* DRAM Config Low */ + + continue; + dimm_err: + if (rc < 0) { + return -1; + } + dimm_mask = disable_dimm(ctrl, i, dimm_mask, meminfo); + } + + meminfo->dimm_mask = dimm_mask; // store final dimm_mask + + get_extra_dimm_mask(ctrl, meminfo); // will be used by RDqsEn and dimm_x4 + /* DRAM Timing Low Register */ + + /* DRAM Timing High Register */ + set_TrwtTO(ctrl, param); + set_Twrrd (ctrl, param); + set_Twrwr (ctrl, param); + set_Trdrd (ctrl, param); + + set_4RankRDimm(ctrl, param, meminfo); + + /* DRAM Config High */ + set_Tfaw(ctrl, param, meminfo); + set_DcqBypassMax(ctrl, param); + set_max_async_latency(ctrl, param); + set_RDqsEn(ctrl, param, meminfo); + + /* DRAM Config Low */ + set_ecc(ctrl, param, dimm_mask, meminfo); + set_dimm_x4(ctrl, param, meminfo); + set_DramTerm(ctrl, param, meminfo); + + /* DRAM Control Misc */ + set_idle_cycle_limit(ctrl, param); + set_RdWrQByp(ctrl, param); + + return dimm_mask; +} + +static void sdram_set_spd_registers(const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + struct spd_set_memclk_result result; + const struct mem_param *param; + struct mem_param paramx; + struct mem_info *meminfo; + long dimm_mask; +#if 1 + if (!sysinfo->ctrl_present[ctrl->node_id]) { +// print_debug("No memory controller present\r\n"); + return; + } +#endif + meminfo = &sysinfo->meminfo[ctrl->node_id]; + + print_debug_addr("sdram_set_spd_registers: paramx :", ¶mx); + + activate_spd_rom(ctrl); + dimm_mask = spd_detect_dimms(ctrl); + if (!(dimm_mask & ((1 << DIMM_SOCKETS) - 1))) { + print_debug("No memory for this cpu\r\n"); + return; + } + dimm_mask = spd_enable_2channels(ctrl, dimm_mask, meminfo); + if (dimm_mask < 0) + goto hw_spd_err; + dimm_mask = spd_set_ram_size(ctrl , dimm_mask, meminfo); + if (dimm_mask < 0) + goto hw_spd_err; + dimm_mask = spd_handle_unbuffered_dimms(ctrl, dimm_mask, meminfo); + if (dimm_mask < 0) + goto hw_spd_err; + result = spd_set_memclk(ctrl, dimm_mask, meminfo); + param = result.param; + dimm_mask = result.dimm_mask; + if (dimm_mask < 0) + goto hw_spd_err; + + //store memclk set to sysinfo, incase we need rebuilt param again + meminfo->memclk_set = param->dch_memclk; + + memcpy(¶mx, param, sizeof(paramx)); + + paramx.divisor = get_exact_divisor(param->dch_memclk, paramx.divisor); + + dimm_mask = spd_set_dram_timing(ctrl, ¶mx , dimm_mask, meminfo); // dimm_mask will be stored to meminfo->dimm_mask + if (dimm_mask < 0) + goto hw_spd_err; + + order_dimms(ctrl, meminfo); + + return; + hw_spd_err: + /* Unrecoverable error reading SPD data */ + print_err("SPD error - reset\r\n"); + hard_reset(); + return; +} + +#define TIMEOUT_LOOPS 300000 + +#include "raminit_f_dqs.c" + +#if HW_MEM_HOLE_SIZEK != 0 +static uint32_t hoist_memory(int controllers, const struct mem_controller *ctrl,unsigned hole_startk, int i) +{ + int ii; + uint32_t carry_over; + device_t dev; + uint32_t base, limit; + uint32_t basek; + uint32_t hoist; + int j; + + carry_over = (4*1024*1024) - hole_startk; + + for(ii=controllers - 1;ii>i;ii--) { + base = pci_read_config32(ctrl[0].f1, 0x40 + (ii << 3)); + if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { + continue; + } + limit = pci_read_config32(ctrl[0].f1, 0x44 + (ii << 3)); + limit += (carry_over << 2 ); + base += (carry_over << 2 ); + for(j = 0; j < controllers; j++) { + pci_write_config32(ctrl[j].f1, 0x44 + (ii << 3), limit); + pci_write_config32(ctrl[j].f1, 0x40 + (ii << 3), base ); + } + } + limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3)); + limit += (carry_over << 2); + for(j = 0; j < controllers; j++) { + pci_write_config32(ctrl[j].f1, 0x44 + (i << 3), limit); + } + dev = ctrl[i].f1; + base = pci_read_config32(dev, 0x40 + (i << 3)); + basek = (base & 0xffff0000) >> 2; + if(basek == hole_startk) { + //don't need set memhole here, because hole off set will be 0, overflow + //so need to change base reg instead, new basek will be 4*1024*1024 + base &= 0x0000ffff; + base |= (4*1024*1024)<<2; + for(j = 0; j < controllers; j++) { + pci_write_config32(ctrl[j].f1, 0x40 + (i<<3), base); + } + } + else + { + hoist = /* hole start address */ + ((hole_startk << 10) & 0xff000000) + + /* hole address to memory controller address */ + (((basek + carry_over) >> 6) & 0x0000ff00) + + /* enable */ + 1; + pci_write_config32(dev, 0xf0, hoist); + } + + return carry_over; +} + +static void set_hw_mem_hole(int controllers, const struct mem_controller *ctrl) +{ + + uint32_t hole_startk; + int i; + + hole_startk = 4*1024*1024 - HW_MEM_HOLE_SIZEK; + +#if HW_MEM_HOLE_SIZE_AUTO_INC == 1 + //We need to double check if the hole_startk is valid, if it is equal to basek, we need to decrease it some + uint32_t basek_pri; + for(i=0; i<controllers; i++) { + uint32_t base; + unsigned base_k; + base = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3)); + if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { + continue; + } + base_k = (base & 0xffff0000) >> 2; + if(base_k == hole_startk) { + hole_startk -= (base_k - basek_pri)>>1; // decrease mem hole startk to make sure it is on middle of previous node + break; //only one hole + } + basek_pri = base_k; + } +#endif + //find node index that need do set hole + for(i=0; i<controllers; i++) { + uint32_t base, limit; + unsigned base_k, limit_k; + base = pci_read_config32(ctrl[0].f1, 0x40 + (i << 3)); + if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { + continue; + } + limit = pci_read_config32(ctrl[0].f1, 0x44 + (i << 3)); + base_k = (base & 0xffff0000) >> 2; + limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2; + if ((base_k <= hole_startk) && (limit_k > hole_startk)) { + unsigned end_k; + hoist_memory(controllers, ctrl, hole_startk, i); + end_k = memory_end_k(ctrl, controllers); + set_top_mem(end_k, hole_startk); + break; //only one hole + } + } + +} + +#endif + +static void sdram_enable(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + int i; + + +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + unsigned cpu_f0_f1[8]; + tsc_t tsc, tsc0[8]; + + print_debug_addr("sdram_enable: tsc0[8]: ", &tsc0[0]); +#endif + uint32_t dword; + + /* Error if I don't have memory */ + if (memory_end_k(ctrl, controllers) == 0) { + die("No memory\r\n"); + } + + /* Before enabling memory start the memory clocks */ + for(i = 0; i < controllers; i++) { + uint32_t dtl, dch; + if (!sysinfo->ctrl_present[ i ]) + continue; + dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH); + + // if no memory installed, disabled the interface + if(sysinfo->meminfo[i].dimm_mask==0x00){ + dch |= DCH_DisDramInterface; + pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch); + + } + else { + dch |= DCH_MemClkFreqVal; + pci_write_config32(ctrl[i].f2, DRAM_CONFIG_HIGH, dch); + /* address timing and Output driver comp Control */ + set_misc_timing(ctrl+i, sysinfo->meminfo+i ); + } + } + + /* We need to wait a mimmium of 20 MEMCLKS to enable the InitDram */ + memreset(controllers, ctrl); + + for(i = 0; i < controllers; i++) { + uint32_t dcl, dch; + if (!sysinfo->ctrl_present[ i ]) + continue; + /* Skip everything if I don't have any memory on this controller */ + dch = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_HIGH); + if (!(dch & DCH_MemClkFreqVal)) { + continue; + } + + /* ChipKill */ + dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW); + if (dcl & DCL_DimmEccEn) { + uint32_t mnc; + print_spew("ECC enabled\r\n"); + mnc = pci_read_config32(ctrl[i].f3, MCA_NB_CONFIG); + mnc |= MNC_ECC_EN; + if (dcl & DCL_Width128) { + mnc |= MNC_CHIPKILL_EN; + } + pci_write_config32(ctrl[i].f3, MCA_NB_CONFIG, mnc); + } + +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i); + if(cpu_f0_f1[i]) { + //Rev F0/F1 workaround +#if 1 + /* Set the DqsRcvEnTrain bit */ + dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL); + dword |= DC_DqsRcvEnTrain; + pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword); +#endif + tsc0[i] = rdtsc(); + } +#endif + +#if 0 + /* Set the DqsRcvEnTrain bit */ + dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL); + dword |= DC_DqsRcvEnTrain; + pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword); +#endif + + pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl); + dcl |= DCL_InitDram; + pci_write_config32(ctrl[i].f2, DRAM_CONFIG_LOW, dcl); + + } + + for(i = 0; i < controllers; i++) { + uint32_t dcl, dch, dcm; + if (!sysinfo->ctrl_present[ i ]) + continue; + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + print_debug("Initializing memory: "); + int loops = 0; + do { + dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW); + loops++; + if ((loops & 1023) == 0) { + print_debug("."); + } + } while(((dcl & DCL_InitDram) != 0) && (loops < TIMEOUT_LOOPS)); + if (loops >= TIMEOUT_LOOPS) { + print_debug(" failed\r\n"); + continue; + } + + /* Wait until it is safe to touch memory */ + do { + dcm = pci_read_config32(ctrl[i].f2, DRAM_CTRL_MISC); + } while(((dcm & DCM_MemClrStatus) == 0) /* || ((dcm & DCM_DramEnabled) == 0)*/ ); + +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + if(cpu_f0_f1[i]) { + tsc= rdtsc(); + + print_debug_dqs_tsc("\r\nbegin tsc0", i, tsc0[i].hi, tsc0[i].lo, 2); + print_debug_dqs_tsc("end tsc ", i, tsc.hi, tsc.lo, 2); + + if(tsc.lo<tsc0[i].lo) { + tsc.hi--; + } + tsc.lo -= tsc0[i].lo; + tsc.hi -= tsc0[i].hi; + + tsc0[i].lo = tsc.lo; + tsc0[i].hi = tsc.hi; + + print_debug_dqs_tsc(" dtsc0", i, tsc0[i].hi, tsc0[i].lo, 2); + } +#endif + print_debug(" done\r\n"); + } + +#if HW_MEM_HOLE_SIZEK != 0 + // init hw mem hole here + /* DramHoleValid bit only can be set after MemClrStatus is set by Hardware */ + set_hw_mem_hole(controllers, ctrl); +#endif + + //store tom to sysinfo, and it will be used by dqs_timing + { + msr_t msr; + //[1M, TOM) + msr = rdmsr(TOP_MEM); + sysinfo->tom_k = ((msr.hi<<24) | (msr.lo>>8))>>2; + + //[4G, TOM2) + msr = rdmsr(TOP_MEM2); + sysinfo->tom2_k = ((msr.hi<<24)| (msr.lo>>8))>>2; + } + + for(i = 0; i < controllers; i++) { + sysinfo->mem_trained[i] = 0; + } + +#if MEM_TRAIN_SEQ == 0 + #if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + dqs_timing(controllers, ctrl, tsc0, sysinfo); + #else + dqs_timing(controllers, ctrl, sysinfo); + #endif +#else + + #if MEM_TRAIN_SEQ == 2 + //need to enable mtrr, so dqs training could access the test address + setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k); + #endif + + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[ i ]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + dqs_timing(i, ctrl, sysinfo, 1); + + #if MEM_TRAIN_SEQ == 1 + break; // only train the first node with ram + #endif + } + + #if MEM_TRAIN_SEQ == 2 + clear_mtrr_dqs(sysinfo->tom2_k); + #endif + +#endif + + +} +static void fill_mem_ctrl(int controllers, struct mem_controller *ctrl_a, const uint16_t *spd_addr) +{ + int i; + int j; + struct mem_controller *ctrl; + for(i=0;i<controllers; i++) { + ctrl = &ctrl_a[i]; + ctrl->node_id = i; + ctrl->f0 = PCI_DEV(0, 0x18+i, 0); + ctrl->f1 = PCI_DEV(0, 0x18+i, 1); + ctrl->f2 = PCI_DEV(0, 0x18+i, 2); + ctrl->f3 = PCI_DEV(0, 0x18+i, 3); + + if(spd_addr == (void *)0) continue; + + for(j=0;j<DIMM_SOCKETS;j++) { + ctrl->channel0[j] = spd_addr[(i*2+0)*DIMM_SOCKETS + j]; + ctrl->channel1[j] = spd_addr[(i*2+1)*DIMM_SOCKETS + j]; + } + } +} diff --git a/src/northbridge/amd/amdk8/raminit_f_dqs.c b/src/northbridge/amd/amdk8/raminit_f_dqs.c new file mode 100644 index 0000000000..f77b6d6c86 --- /dev/null +++ b/src/northbridge/amd/amdk8/raminit_f_dqs.c @@ -0,0 +1,2036 @@ +/* + yhlu 2005.10 dqs training +*/ +//0: mean no debug info +#define DQS_TRAIN_DEBUG 0 + +static inline void print_debug_dqs(const char *str, unsigned val, unsigned level) +{ +#if DQS_TRAIN_DEBUG > 0 + if(DQS_TRAIN_DEBUG > level) { + #if CONFIG_USE_INIT == 1 + printk_debug("%s%x\r\n", str, val); + #else + print_debug(str); print_debug_hex32(val); print_debug("\r\n"); + #endif + } +#endif +} + +static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level) +{ +#if DQS_TRAIN_DEBUG > 0 + if(DQS_TRAIN_DEBUG > level) { + #if CONFIG_USE_INIT == 1 + printk_debug("%s%08x%s%08x\r\n", str, val, str2, val2); + #else + print_debug(str); print_debug_hex32(val); print_debug(str2); print_debug_hex32(val2); print_debug("\r\n"); + #endif + } +#endif +} + +static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level) +{ +#if DQS_TRAIN_DEBUG > 0 + if(DQS_TRAIN_DEBUG > level) { + #if CONFIG_USE_INIT == 1 + printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2); + #else + print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n"); + #endif + } +#endif +} + +static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2) +{ + #if CONFIG_USE_INIT == 1 + printk_debug("%s[%02x]=%08x%08x\r\n", str, i, val, val2); + #else + print_debug(str); print_debug("["); print_debug_hex8(i); print_debug("]="); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n"); + #endif + +} + +static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + + int i; + sysinfo->mem_base[nodeid] = pci_read_config32(ctrl->f1, 0x40 + (nodeid<<3)); + + for(i=0;i<8; i++) { + sysinfo->cs_base[nodeid*8+i] = pci_read_config32(ctrl->f2, 0x40 + (i<<2)); + } + + sysinfo->hole_reg[nodeid] = pci_read_config32(ctrl->f1, 0xf0); + +} +static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo) +{ + uint32_t dword; + uint32_t mem_base; + unsigned nodeid = ctrl->node_id; + +#if HW_MEM_HOLE_SIZEK != 0 + uint32_t hole_reg; +#endif + + //get the local base addr of the chipselect + dword = sysinfo->cs_base[nodeid * 8 + cs_idx]; + dword &= 0xfffffff0; + + //sys addr= node base + local cs base + mem_base = sysinfo->mem_base[nodeid]; + mem_base &= 0xffff0000; + + dword += mem_base; +#if HW_MEM_HOLE_SIZEK != 0 + hole_reg = sysinfo->hole_reg[nodeid]; + if(hole_reg & 1) { + unsigned hole_startk; + hole_startk = (hole_reg & (0xff<<24)) >> 10; + if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) { + dword += ((4*1024*1024 - hole_startk)<<2); + } + } +#endif + + //add 1MB offset to avoid compat area + dword += (1<<(20-8)); + + //So final result is upper 32 bit addr + + return dword; + +} + +static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo) +{ +#if 0 + //get SB_64MuxedMode + uint32_t dword; + dword = pci_read_config32(ctrl->f2, DRAM_CTRL_MISC); + if((dword & DCM_Mode64BitMux) == DCM_Mode64BitMux) { + if(channel) cs_idx += 4; // translate Receiver number to Chipsel + } +#endif + + return Get_MCTSysAddr(ctrl, cs_idx, sysinfo); + +} + +static inline unsigned long read_cr4(void) +{ + unsigned long cr4; + asm volatile ("movl %%cr4, %0" : "=r" (cr4)); + return cr4; +} + +static inline void write_cr4(unsigned long cr4) +{ + asm volatile ("movl %0, %%cr4" : : "r" (cr4)); +} + + +static inline void enable_sse2() +{ + unsigned long cr4; + cr4 = read_cr4(); + cr4 |= (1<<9); + write_cr4(cr4); +} + +static inline void disable_sse2() +{ + unsigned long cr4; + cr4 = read_cr4(); + cr4 &= ~(1<<9); + write_cr4(cr4); +} + + +static void set_wrap32dis(void) { + msr_t msr; + + msr = rdmsr(0xc0010015); + msr.lo |= (1<<17); + + wrmsr(0xc0010015, msr); + +} + +static void clear_wrap32dis(void) { + msr_t msr; + + msr = rdmsr(0xc0010015); + msr.lo &= ~(1<<17); + + wrmsr(0xc0010015, msr); + +} + +static void set_FSBASE(uint32_t addr_hi) +{ + msr_t msr; + + //set fs and use fs prefix to access the mem + msr.hi = addr_hi; + msr.lo = 0; + wrmsr(0xc0000100, msr); //FS_BASE + +} + +#if 0 +static void write_mem(uint32_t addr_hi, uint32_t addr_lo, uint32_t value) +{ + if(addr_hi == 0) { + *((uint32_t *)addr_lo) = value; + return; + } + + set_FSBASE(addr_hi); + + __asm__ volatile ( + "movl %1, %%fs:(%0)\n\t" + :: "a" (addr_lo), "b" (value) + ); + +} + +static uint32_t read_mem(uint32_t addr_hi, uint32_t addr_lo) +{ + unsigned value; + if(addr_hi == 0) { + value = *((uint32_t *)addr_lo); + return value; + } + + set_FSBASE(addr_hi); + + __asm__ volatile ( + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr_lo) + ); + + return value; + +} +#endif + +static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo) +{ + unsigned enabled; + unsigned nodeid = ctrl->node_id; + + + enabled = sysinfo->cs_base[nodeid * 8 + cs_idx]; + enabled &= 1; + + return enabled; + +} + +static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo) +{ + if(!is_Width128) { + if(channel) return 0; // no channel b + } + + return ChipSelPresent(ctrl, cs_idx, sysinfo); +} + +static void WriteLNTestPattern(unsigned addr_lo, uint8_t *buf_a, unsigned line_num) +{ + __asm__ volatile ( + "1:\n\t" + "movdqa (%3), %%xmm0\n\t" + "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */ + "addl %1, %0\n\t" + "addl %1, %3\n\t" + "loop 1b\n\t" + + :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a) + ); + + +} + +static void Write1LTestPattern(unsigned addr, unsigned p, uint8_t *buf_a, uint8_t *buf_b) +{ + uint8_t *buf; + if(p==1) { buf = buf_b; } + else { buf = buf_a; } + + set_FSBASE (addr>>24); + + WriteLNTestPattern(addr<<8, buf, 1); +} + +static void Read1LTestPattern(unsigned addr) +{ + unsigned value; + + set_FSBASE(addr>>24); + + /* 1st move causes read fill (to exclusive or shared)*/ + __asm__ volatile ( + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr<<8) + ); + +} + +#define DQS_PASS 0 +#define DQS_FAIL 1 + +#define DQS_FIRST_PASS 1 +#define DQS_SECOND_PASS 2 + +#define SB_NORCVREN 11 +#define RCVREN_MARGIN 6 +#define SB_SmallRCVR 13 +#define SB_CHA2BRCVREN 12 +#define SB_NODQSPOS 14 +#define MIN_DQS_WNDW 3 +#define SB_SMALLDQS 15 + + +static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const uint32_t *TestPattern0, const uint32_t *TestPattern1, const uint32_t *TestPattern2, unsigned Pass, unsigned is_Width128) +{ + uint32_t addr_lo; + uint32_t *test_buf; + uint32_t value; + uint32_t value_test; + unsigned result = DQS_FAIL; + + if(Pass == DQS_FIRST_PASS) { + if(pattern==1) { + test_buf = (uint32_t *)TestPattern1; + } + else { + test_buf = (uint32_t *)TestPattern0; + } + } + else { + test_buf = (uint32_t *)TestPattern2; + } + + set_FSBASE(addr>>24); + + addr_lo = addr<<8; + + if(is_Width128 && (channel == 1)) { + addr_lo += 8; //second channel + test_buf += 2; + } + + __asm__ volatile ( + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr_lo) + ); + + value_test = *test_buf; + + + print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4); + print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4); + + if(value == value_test) { + addr_lo += 4; + test_buf++; + __asm__ volatile ( + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr_lo) + ); + value_test = *test_buf; + print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4); + print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4); + + if(value == value_test){ + result = DQS_PASS; + } + } + + if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted + if(result==DQS_PASS) { + result = DQS_FAIL; + } + else { + result = DQS_PASS; + } + } + + return result; + +} + +static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly) +{ + uint32_t reg; + + dly += (20-1); // round it + dly /= 20; // convert from unit 50ps to 1ns + + dly += 6; + + + reg = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT); + reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT); + pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg); + +} + +/* + Set the Target range to WT IO (using an IORR overlapping the already existing + WB dram type). Use IORR0 +*/ +static void SetTargetWTIO(unsigned addr) +{ + msr_t msr; + msr.hi = addr>>24; + msr.lo = addr<<8; + wrmsr(0xc0010016, msr); //IORR0 BASE + + msr.hi = 0xff; + msr.lo = 0xfc000800; // 64MB Mask + wrmsr(0xc0010017, msr); // IORR0 Mask +} + +static void ResetTargetWTIO(void) +{ + msr_t msr; + + msr.hi = 0; + msr.lo = 0; + wrmsr(0xc0010017, msr); // IORR0 Mask +} + +static void proc_CLFLUSH(unsigned addr) +{ + + set_FSBASE(addr>>24); + + /* 1st move causes read fill (to exclusive or shared)*/ + __asm__ volatile ( + /* clflush fs:[eax] */ + "clflush %%fs:(%0)\n\t" + ::"a" (addr<<8) + ); + +} +static void proc_IOCLFLUSH(unsigned addr) +{ + SetTargetWTIO(addr); + proc_CLFLUSH(addr); + ResetTargetWTIO(); +} + +static void ResetDCTWrPtr(const struct mem_controller *ctrl) +{ + uint32_t dword; + unsigned index = 0x10; + + dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index); + pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword); + +} + + +static uint16_t get_exact_T1000(unsigned i) +{ + // 200 266, 333, 400 + const static uint16_t T1000_a[]= { 5000, 3759, 3003, 2500 }; + + static const uint16_t TT_a[] = { + /*200 266 333 400 */ + /*4 */ 6250, 6250, 6250, 6250, + /*5 */ 5000, 5000, 5000, 2500, + /*6 */ 5000, 4166, 4166, 2500, + /*7 */ 5000, 4285, 3571, 2500, + + /*8 */ 5000, 3750, 3125, 2500, + /*9 */ 5000, 3888, 3333, 2500, + /*10*/ 5000, 4000, 3000, 2500, + /*11*/ 5000, 4090, 3181, 2500, + + /*12*/ 5000, 3750, 3333, 2500, + /*13*/ 5000, 3846, 3076, 2500, + /*14*/ 5000, 3928, 3214, 2500, + /*15*/ 5000, 4000, 3000, 2500, + }; + + unsigned fid_cur; + int index; + + msr_t msr; + msr = rdmsr(0xc0010042); + fid_cur = msr.lo & 0x3f; + + index = fid_cur>>1; + + if(index>12) return T1000_a[i]; + + return TT_a[index * 4+i]; + +} + +static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl) +{ + int i; + uint32_t dword; + + dword = 0x00000000; + for(i=1; i<=3; i++) { + /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */ + pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword); + pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword); + } + + dword = 0x2f2f2f2f; + for(i=5; i<=7; i++) { + /* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */ + pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword); + pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword); + } + + +} +#ifndef K8_REV_F_SUPPORT_F0_F1_WORKAROUND +#define K8_REV_F_SUPPORT_F0_F1_WORKAROUND 1 +#endif + +static void TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo) +{ + + const static uint32_t TestPattern0[] = { + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + }; + const static uint32_t TestPattern1[] = { + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + }; + const static uint32_t TestPattern2[] = { + 0x12345678, 0x87654321, 0x23456789, 0x98765432, + 0x59385824, 0x30496724, 0x24490795, 0x99938733, + 0x40385642, 0x38465245, 0x29432163, 0x05067894, + 0x12349045, 0x98723467, 0x12387634, 0x34587623, + }; + + uint8_t pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */ + uint8_t *buf_a, *buf_b; + uint32_t ecc_bit; + uint32_t dword; + uint8_t *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8 + + int i; + + unsigned channel, receiver; + + unsigned Errors; + unsigned CTLRMaxDelay; + unsigned T1000; + + unsigned LastTest; + unsigned CurrTest; + unsigned Test0, Test1; + + unsigned RcvrEnDlyRmin; + + unsigned two_ranks; + unsigned RcvrEnDly; + + unsigned PatternA; + unsigned PatternB; + + unsigned TestAddr0, TestAddr0B, TestAddr1, TestAddr1B; + + unsigned CurrRcvrCHADelay; + + unsigned tmp; + + unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128; + + unsigned cpu_f0_f1; + + if(Pass == DQS_FIRST_PASS) { + InitDQSPos4RcvrEn(ctrl); + } + + //enable SSE2 + enable_sse2(); + + //wrap32dis + set_wrap32dis(); + + //disable ECC temp + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + ecc_bit = dword & DCL_DimmEccEn; + dword &= ~(DCL_DimmEccEn); + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword); + + + if(Pass == DQS_FIRST_PASS) { +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id); + if(!cpu_f0_f1) +#endif + { +#if 1 + /* Set the DqsRcvEnTrain bit */ + dword = pci_read_config32(ctrl->f2, DRAM_CTRL); + dword |= DC_DqsRcvEnTrain; + pci_write_config32(ctrl->f2, DRAM_CTRL, dword); +#endif + } + } + + //get T1000 figures (cycle time (ns)) * 1K + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); + dword &= DCH_MemClkFreq_MASK; + + T1000 = get_exact_T1000(dword); + + // SetupRcvrPattern + buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0)); + buf_b = buf_a + 128; //?? + if(Pass==DQS_FIRST_PASS) { + for(i=0;i<16;i++) { + *((uint32_t *)(buf_a + i*4)) = TestPattern0[i]; + *((uint32_t *)(buf_b + i*4)) = TestPattern1[i]; + } + } + else { + for(i=0;i<16;i++) { + *((uint32_t *)(buf_a + i*4)) = TestPattern2[i]; + *((uint32_t *)(buf_b + i*4)) = TestPattern2[i]; + } + } + + print_debug_dqs("\r\nTrainRcvEn: 0 ctrl", ctrl->node_id, 0); + + print_debug_addr("TrainRcvEn: buf_a:", buf_a); + + Errors = 0; + /* for each channel */ + CTLRMaxDelay = 0; + for(channel = 0; (channel < 2) && (!Errors); channel++) + { + print_debug_dqs("\tTrainRcvEn51: channel ",channel, 1); + + /* for each rank */ + /* there are four recriver pairs, loosely associated with CS */ + for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2) + { + + unsigned index=(receiver>>1) * 3 + 0x10; + + print_debug_dqs("\t\tTrainRcvEn52: index ", index, 2); + + if(is_Width128) { + if(channel) { + dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index); + CurrRcvrCHADelay= dword & 0xff; + } + } + else { + if(channel) { + index += 0x20; + } + } + + LastTest = DQS_FAIL; + RcvrEnDlyRmin = 0xaf; + + if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue; + + /* for each DQS receiver enable setting */ + + TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo); + + TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB + + if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) { + TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo); + TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB + two_ranks = 1; + } + else { + two_ranks = 0; + } + + print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2); + + Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0 + Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1 + + if(two_ranks == 1) { + Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm + Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm + } + + if(Pass == DQS_FIRST_PASS) { + RcvrEnDly = 0; + } else { + RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver]; + } + + while ( RcvrEnDly < 0xaf) { // Sweep Delay value here + print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); + + if(RcvrEnDly & 1) { + /* Odd steps get another pattern such that even + and odd steps alternate. + The pointers to the patterns will be swapped + at the end of the loop so they are correspond + */ + PatternA = 1; + PatternB = 0; + } + else { + /* Even step */ + PatternA = 0; + PatternB = 1; + } + + /* Program current Receiver enable delay */ + pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly); + + if(is_Width128) { + /* Program current Receiver enable delay chaannel b */ + pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly); + } + + /* Program the MaxAsyncLat filed with the + current DQS receiver enable setting plus 6ns + */ + /*Porgram MaxAsyncLat to correspond with current delay */ + SetMaxAL_RcvrDly(ctrl, RcvrEnDly); + + CurrTest = DQS_FAIL; + + Read1LTestPattern(TestAddr0); //Cache Fill + /* ROM vs cache compare */ + Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128); + proc_IOCLFLUSH(TestAddr0); + + ResetDCTWrPtr(ctrl); + + print_debug_dqs("\t\t\tTrainRcvEn542: Test0 ", Test0, 3); + + if(Test0 == DQS_PASS) { + + Read1LTestPattern(TestAddr0B); + Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128); + proc_IOCLFLUSH(TestAddr0B); + + ResetDCTWrPtr(ctrl); + + print_debug_dqs("\t\t\tTrainRcvEn543: Test1 ", Test1, 3); + + if(Test1 == DQS_PASS) { + if(two_ranks) { + Read1LTestPattern(TestAddr1); + Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128); + proc_IOCLFLUSH(TestAddr1); + ResetDCTWrPtr(ctrl); + + if(Test0 == DQS_PASS) { + Read1LTestPattern(TestAddr1B); + Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128); + proc_IOCLFLUSH(TestAddr1B); + ResetDCTWrPtr(ctrl); + + if(Test1 == DQS_PASS) { + CurrTest = DQS_PASS; + } + } + print_debug_dqs("\t\t\tTrainRcvEn544: Test0 ", Test0, 3); + } + else { + CurrTest = DQS_PASS; + } + } + } + + print_debug_dqs("\t\t\tTrainRcvEn55: RcvrEnDly ", RcvrEnDly, 3); + + if(CurrTest == DQS_PASS) { + if(LastTest == DQS_FAIL) { + RcvrEnDlyRmin = RcvrEnDly; + break; + } + } + + LastTest = CurrTest; + + /* swap the rank 0 pointers */ + tmp = TestAddr0; + TestAddr0 = TestAddr0B; + TestAddr0B = tmp; + + /* swap the rank 1 pointers */ + tmp = TestAddr1; + TestAddr1 = TestAddr1B; + TestAddr1B = tmp; + + print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); + + RcvrEnDly++; + + } // while RcvrEnDly + + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); + + if(RcvrEnDlyRmin == 0xaf) { + //no passing window + Errors |= SB_NORCVREN; + } + + if(Pass == DQS_FIRST_PASS) { + // We need a better value for DQSPos trainning + RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */; + } else { + RcvrEnDly = RcvrEnDlyRmin; + } + + if(RcvrEnDly > 0xae) { + //passing window too narrow, too far delayed + Errors |= SB_SmallRCVR; + RcvrEnDly = 0xae; + } + + if(Pass == DQS_SECOND_PASS) { //second pass must average vales + RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/; + RcvrEnDly >>= 1; + } + + dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly; + + //Set final RcvrEnDly for this DIMM and Channel + pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly); + + if(is_Width128) { + pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B + if(channel) { + pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay); + if(RcvrEnDly > CurrRcvrCHADelay) { + dword = RcvrEnDly - CurrRcvrCHADelay; + } + else { + dword = CurrRcvrCHADelay - RcvrEnDly; + } + dword *= 50; + if(dword > T1000) { + Errors |= SB_CHA2BRCVREN; + } + } + } + + print_debug_dqs("\t\tTrainRcvEn63: RcvrEnDly ", RcvrEnDly, 2); + + if(RcvrEnDly > CTLRMaxDelay) { + CTLRMaxDelay = RcvrEnDly; + } + + print_debug_dqs("\t\tTrainRcvEn64: CTLRMaxDelay ", CTLRMaxDelay, 2); + + } /* receiver */ + } /* channel */ + + print_debug_dqs("\tTrainRcvEn65: CTLRMaxDelay ", CTLRMaxDelay, 1); + + /* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */ + SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay); + ResetDCTWrPtr(ctrl); + + //Enable ECC again + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dword &= ~(DCL_DimmEccEn); + dword |= ecc_bit; + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword); + + if(Pass == DQS_FIRST_PASS) { +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + if(!cpu_f0_f1) +#endif + { + dword = pci_read_config32(ctrl->f2, DRAM_CTRL); + dword &= ~DC_DqsRcvEnTrain; + pci_write_config32(ctrl->f2, DRAM_CTRL, dword); + } + } + + //Clear wrap32dis + + clear_wrap32dis(); + + //restore SSE2 setting + disable_sse2(); + +#if MEM_TRAIN_SEQ != 1 + /* We need tidy output for type 1 */ + #if CONFIG_USE_INIT == 1 + printk_debug(" CTLRMaxDelay=%02x", CTLRMaxDelay); + #else + print_debug(" CTLRMaxDelay="); print_debug_hex8(CTLRMaxDelay); + #endif +#endif + + if(CTLRMaxDelay==0xae) { + soft_reset(); // try more or downgrade? + } + +} + +#define DQS_READDIR 1 +#define DQS_WRITEDIR 0 + + +static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay) +{ //ByteLane could be 0-8, last is for ECC + unsigned index; + uint32_t dword; + unsigned shift; + + dqs_delay &= 0xff; + + index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2); + shift = bytelane; + while(shift>3) { + shift-=4; + } + shift <<= 3; // 8 bit + + dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index); + dword &= ~(0x3f<<shift); + dword |= (dqs_delay<<shift); + pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword); + +} + +static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay) +{ + unsigned index; + uint32_t dword; + int i; + + dword = 0; + dqs_delay &= 0xff; + for(i=0;i<4;i++) { + dword |= dqs_delay<<(i*8); + } + + index = 1 + channel * 0x20 + direction * 4; + + for(i=0; i<2; i++) { + pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword); + } + +} + +static unsigned MiddleDQS(unsigned min_d, unsigned max_d) +{ + unsigned size_d; + size_d = max_d-min_d; + if(size_d & 1) { //need round up + min_d++; + } + return ( min_d + (size_d>>1)); +} + +static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a, uint8_t dqs_delay) +{ + dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay; +} + +static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , uint8_t *buf_a) +{ + WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9); +} + +static void ReadL18TestPattern(unsigned addr_lo) +{ + //set fs and use fs prefix to access the mem + __asm__ volatile ( + "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line + "movl %%fs:-64(%%esi), %%eax\n\t" //+1 + "movl %%fs:(%%esi), %%eax\n\t" //+2 + "movl %%fs:64(%%esi), %%eax\n\t" //+3 + + "movl %%fs:-128(%%edi), %%eax\n\t" //+4 + "movl %%fs:-64(%%edi), %%eax\n\t" //+5 + "movl %%fs:(%%edi), %%eax\n\t" //+6 + "movl %%fs:64(%%edi), %%eax\n\t" //+7 + + "movl %%fs:-128(%%ebx), %%eax\n\t" //+8 + "movl %%fs:-64(%%ebx), %%eax\n\t" //+9 + "movl %%fs:(%%ebx), %%eax\n\t" //+10 + "movl %%fs:64(%%ebx), %%eax\n\t" //+11 + + "movl %%fs:-128(%%ecx), %%eax\n\t" //+12 + "movl %%fs:-64(%%ecx), %%eax\n\t" //+13 + "movl %%fs:(%%ecx), %%eax\n\t" //+14 + "movl %%fs:64(%%ecx), %%eax\n\t" //+15 + + "movl %%fs:-128(%%edx), %%eax\n\t" //+16 + "movl %%fs:-64(%%edx), %%eax\n\t" //+17 + + :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64) + ); + +} + +static void ReadL9TestPattern(unsigned addr_lo) +{ + + //set fs and use fs prefix to access the mem + __asm__ volatile ( + + "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line + "movl %%fs:-64(%%ecx), %%eax\n\t" //+1 + "movl %%fs:(%%ecx), %%eax\n\t" //+2 + "movl %%fs:64(%%ecx), %%eax\n\t" //+3 + + "movl %%fs:-128(%%edx), %%eax\n\t" //+4 + "movl %%fs:-64(%%edx), %%eax\n\t" //+5 + "movl %%fs:(%%edx), %%eax\n\t" //+6 + "movl %%fs:64(%%edx), %%eax\n\t" //+7 + + "movl %%fs:-128(%%ebx), %%eax\n\t" //+8 + + :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64) + ); + +} + + +static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern) +{ + if(pattern == 0) { + ReadL9TestPattern(addr_lo); + } + else { + ReadL18TestPattern(addr_lo); + } +} + +static void FlushDQSTestPattern_L9(unsigned addr_lo) +{ + __asm__ volatile ( + "clflush %%fs:-128(%%ecx)\n\t" + "clflush %%fs:-64(%%ecx)\n\t" + "clflush %%fs:(%%ecx)\n\t" + "clflush %%fs:64(%%ecx)\n\t" + + "clflush %%fs:-128(%%eax)\n\t" + "clflush %%fs:-64(%%eax)\n\t" + "clflush %%fs:(%%eax)\n\t" + "clflush %%fs:64(%%eax)\n\t" + + "clflush %%fs:-128(%%ebx)\n\t" + + :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64) + ); + +} +static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo) +{ + __asm__ volatile ( + "clflush %%fs:-128(%%eax)\n\t" + "clflush %%fs:-64(%%eax)\n\t" + "clflush %%fs:(%%eax)\n\t" + "clflush %%fs:64(%%eax)\n\t" + + "clflush %%fs:-128(%%edi)\n\t" + "clflush %%fs:-64(%%edi)\n\t" + "clflush %%fs:(%%edi)\n\t" + "clflush %%fs:64(%%edi)\n\t" + + "clflush %%fs:-128(%%ebx)\n\t" + "clflush %%fs:-64(%%ebx)\n\t" + "clflush %%fs:(%%ebx)\n\t" + "clflush %%fs:64(%%ebx)\n\t" + + "clflush %%fs:-128(%%ecx)\n\t" + "clflush %%fs:-64(%%ecx)\n\t" + "clflush %%fs:(%%ecx)\n\t" + "clflush %%fs:64(%%ecx)\n\t" + + "clflush %%fs:-128(%%edx)\n\t" + "clflush %%fs:-64(%%edx)\n\t" + + :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64) + ); +} + +static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern ) +{ + + if(pattern == 0){ + FlushDQSTestPattern_L9(addr_lo); + } + else { + FlushDQSTestPattern_L18(addr_lo); + } +} + +static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, uint8_t *buf_a) +{ + uint32_t *test_buf; + unsigned bitmap = 0xff; + unsigned bytelane; + int i; + uint32_t value; + int j; + uint32_t value_test; + + test_buf = (uint32_t *)buf_a; + + + if(pattern && channel) { + addr_lo += 8; //second channel + test_buf+= 2; + } + + bytelane = 0; + for(i=0;i<9*64/4;i++) { + __asm__ volatile ( + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr_lo) + ); + value_test = *test_buf; + + print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7); + print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7); + + for(j=0;j<4*8;j+=8) { + if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) { + bitmap &= ~(1<<bytelane); + } + + bytelane++; + bytelane &= 0x7; + } + print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7); + + if(bytelane == 0) { + if(pattern == 1) { //dual channel + addr_lo += 8; //skip over other channel's data + test_buf += 2; + } + } + addr_lo += 4; + test_buf +=1; + + } + + + return bitmap; + +} + +static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo) +{ + unsigned ByteLane; + unsigned Errors; + unsigned BanksPresent; + + unsigned MutualCSPassW[48]; + + unsigned ChipSel; + unsigned DQSDelay; + + unsigned TestAddr; + + unsigned LastTest; + unsigned RnkDlyFilterMax, RnkDlyFilterMin; + unsigned RnkDlySeqPassMax, RnkDlySeqPassMin; + + Errors = 0; + BanksPresent = 0; + + print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3); + + print_debug_addr("TrainDQSPos: MutualCSPassW[48] :", MutualCSPassW); + + for(DQSDelay=0; DQSDelay<48; DQSDelay++) { + MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS) + } + + for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7 + print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4); + if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue; + BanksPresent = 1; + + TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo); + + print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4); + + //set fs and use fs prefix to access the mem + set_FSBASE(TestAddr>>24); + + if(Direction == DQS_READDIR) { + print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read so write at first", 0, 4); + WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a); + } + + for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){ + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5); + if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes + SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay); + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + if(Direction == DQS_WRITEDIR) { + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5); + WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a); + } + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", Pattern, 5); + ReadDQSTestPattern(TestAddr<<8, Pattern); + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + SetTargetWTIO(TestAddr); + FlushDQSTestPattern(TestAddr<<8, Pattern); + ResetTargetWTIO(); + } + } + + if(BanksPresent) + for(ByteLane = 0; ByteLane < 8; ByteLane++) { + print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4); + + LastTest = DQS_FAIL; + RnkDlySeqPassMax = 0; + RnkDlyFilterMax = 0; + RnkDlyFilterMin = 0; + for(DQSDelay=0; DQSDelay<48; DQSDelay++) { + if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) { + + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5); + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + + RnkDlySeqPassMax = DQSDelay; + if(LastTest == DQS_FAIL) { + RnkDlySeqPassMin = DQSDelay; //start sequential run + } + if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){ + RnkDlyFilterMin = RnkDlySeqPassMin; + RnkDlyFilterMax = RnkDlySeqPassMax; + } + LastTest = DQS_PASS; + } + else { + LastTest = DQS_FAIL; + } + } + print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4); + + if(RnkDlySeqPassMax == 0) { + Errors |= SB_NODQSPOS; // no passing window + } + else { + print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax ", RnkDlyFilterMax, 4); + print_debug_dqs("\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin ", RnkDlyFilterMin, 4); + if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){ + Errors |= SB_SMALLDQS; + } + else { + unsigned middle_dqs; + middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax); + print_debug_dqs("\t\t\t\tTrainDQSPos: 35 middle_dqs ",middle_dqs, 4); + SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs); + save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs); + } + } + + } + + print_debug_dqs("\t\t\tTrainDQSPos: end", 0xff, 3); + + return Errors; + + +} + +static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo) +{ + print_debug_dqs("\t\tTrainReadPos", 0, 2); + return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo); +} + +static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, uint8_t *buf_a, uint8_t *dqs_delay_a, struct sys_info *sysinfo) +{ + print_debug_dqs("\t\tTrainWritePos", 0, 2); + return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo); +} + + + +static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + const static uint32_t TestPatternJD1a[] = { + 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN + 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD + 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD + 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD + 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD + 0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD + 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD + 0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD + 0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD + 0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD + }; + const static uint32_t TestPatternJD1b[] = { + 0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN + 0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD + 0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD + 0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD + 0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD + 0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD + 0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD + 0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD + 0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD + 0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD + 0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD + 0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD + 0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD + 0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD + 0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD + 0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD + 0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD + 0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD + 0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD + 0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD + 0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD + 0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD + 0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD + 0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD + 0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD + 0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD + 0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD + }; + uint8_t pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */ + uint8_t *buf_a; + + unsigned pattern; + uint32_t dword; + uint32_t ecc_bit; + unsigned Errors; + unsigned channel; + int i; + unsigned DQSWrDelay; + unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128; + uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9 + + //enable SSE2 + enable_sse2(); + + //wrap32dis + set_wrap32dis(); + + //disable ECC temp + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + ecc_bit = dword & DCL_DimmEccEn; + dword &= ~(DCL_DimmEccEn); + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword); + + //SetupDqsPattern + buf_a = (uint8_t *)(((uint32_t)(&pattern_buf_x[0]) + 0x10) & (~0xf)); + + if(is_Width128){ + pattern = 1; + for(i=0;i<16*18;i++) { + *((uint32_t *)(buf_a + i*4)) = TestPatternJD1b[i]; + } + } + else { + pattern = 0; + for(i=0; i<16*9;i++) { + *((uint32_t *)(buf_a + i*4)) = TestPatternJD1a[i]; + } + + } + + print_debug_dqs("\r\nTrainDQSRdWrPos: 0 ctrl ", ctrl->node_id, 0); + + print_debug_addr("TrainDQSRdWrPos: buf_a:", buf_a); + + Errors = 0; + + channel = 0; + while( (channel<2) && (!Errors)) { + print_debug_dqs("\tTrainDQSRdWrPos: 1 channel ",channel, 1); + for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) { + unsigned err; + SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay); + print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2); + err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo); + print_debug_dqs("\t\tTrainDQSRdWrPos: 22 err ",err, 2); + if(err == 0) break; + Errors |= err; + } + + print_debug_dqs("\tTrainDQSRdWrPos: 3 DQSWrDelay ", DQSWrDelay, 1); + + if(DQSWrDelay < 48) { + Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo); + print_debug_dqs("\tTrainDQSRdWrPos: 4 Errors ", Errors, 1); + + } + channel++; + if(!is_Width128){ + channel++; // skip channel if 64-bit mode + } + } + + //Enable ECC again + dword = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dword &= ~(DCL_DimmEccEn); + dword |= ecc_bit; + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword); + + //Clear wrap32dis + + clear_wrap32dis(); + + //restore SSE2 setting + disable_sse2(); + + print_debug_dqs("TrainDQSRdWrPos: ", 5, 0); + + return Errors; + +} +static inline uint8_t get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, uint8_t *dqs_delay_a) +{ + return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane]; +} + +static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, uint8_t *dqs_delay_a) +/* InterFactor: 0: 100% ByteLane 0 + 0x80: 50% between ByteLane 0 and 1 + 0xff: 99.6% ByteLane 1 and 0.4% like 0 +*/ +{ + unsigned DQSDelay0, DQSDelay1; + unsigned DQSDelay; + + DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a); + DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a); + + if(DQSDelay0>DQSDelay1) { + DQSDelay = DQSDelay0 - DQSDelay1; + InterFactor = 0xff - InterFactor; + } + else { + DQSDelay = DQSDelay1 - DQSDelay0; + } + + DQSDelay *= InterFactor; + + DQSDelay >>= 8; // /255 + + if(DQSDelay0>DQSDelay1) { + DQSDelay += DQSDelay1; + } + else { + DQSDelay += DQSDelay0; + } + + return DQSDelay; + +} + +static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + unsigned channel; + unsigned ByteLane; + unsigned Direction; + unsigned lane0, lane1, ratio; + unsigned dqs_delay; + + unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR }; + int i; + uint8_t *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9 + + ByteLane = 8; + + for(channel = 0; channel < 2; channel++) { + for(i=0;i<2;i++) { + Direction = direction[i]; + lane0 = 4; lane1 = 5; ratio = 0; + dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a); + print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2); + SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay); + save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay); + } + } +} + +static void train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo) +{ + print_debug_dqs("\r\ntrain_DqsRcvrEn: begin ctrl ", ctrl->node_id, 0); + TrainRcvrEn(ctrl, Pass, sysinfo); + print_debug_dqs("\r\ntrain_DqsRcvrEn: end ctrl ", ctrl->node_id, 0); + +} +static void train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo) +{ + print_debug_dqs("\r\ntrain_DqsPos: begin ctrl ", ctrl->node_id, 0); + if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) { + print_err("\r\nDQS Training Rd Wr failed ctrl"); print_err_hex8(ctrl->node_id); print_err("\r\n"); + soft_reset(); + } + else { + SetEccDQSRdWrPos(ctrl, sysinfo); + } + print_debug_dqs("\r\ntrain_DqsPos: end ctrl ", ctrl->node_id, 0); + +} + +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 +static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo) +{ + tsc_t tsc1[8]; + unsigned cpu_f0_f1[8]; + int i; + + print_debug_addr("dqs_timing: tsc1[8] :", tsc1); + + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[i]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + uint32_t dword; + + cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i); + + if(!cpu_f0_f1[i]) continue; + + dword = pci_read_config32(ctrl[i].f2, DRAM_CTRL); + dword &= ~DC_DqsRcvEnTrain; + pci_write_config32(ctrl[i].f2, DRAM_CTRL, dword); + + dword = pci_read_config32(ctrl[i].f2, DRAM_INIT); + dword |= DI_EnDramInit; + pci_write_config32(ctrl[i].f2, DRAM_INIT, dword); + dword &= ~DI_EnDramInit; + pci_write_config32(ctrl[i].f2, DRAM_INIT, dword); + + tsc1[i] = rdtsc(); + print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2); + + dword = tsc1[i].lo + tsc0[i].lo; + if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) { + tsc1[i].hi++; + } + tsc1[i].lo = dword; + tsc1[i].hi+= tsc0[i].hi; + + print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2); + + } + + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[i]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + if(!cpu_f0_f1[i]) continue; + + tsc_t tsc; + + do { + tsc = rdtsc(); + } while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo))); + + print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2); + } + +} + +#endif + + +/* setting variable mtrr, comes from linux kernel source */ +static void set_var_mtrr_dqs( + unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned address_bits) +{ + msr_t base, mask; + unsigned address_mask_high; + + address_mask_high = ((1u << (address_bits - 32u)) - 1u); + + base.hi = basek >> 22; + base.lo = basek << 10; + + if (sizek < 4*1024*1024) { + mask.hi = address_mask_high; + mask.lo = ~((sizek << 10) -1); + } + else { + mask.hi = address_mask_high & (~((sizek >> 22) -1)); + mask.lo = 0; + } + + if (reg >= 8) + return; + + if (sizek == 0) { + msr_t zero; + zero.lo = zero.hi = 0; + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + wrmsr (MTRRphysMask_MSR(reg), zero); + } else { + /* Bit 32-35 of MTRRphysMask should be set to 1 */ + base.lo |= type; + mask.lo |= 0x800; + wrmsr (MTRRphysBase_MSR(reg), base); + wrmsr (MTRRphysMask_MSR(reg), mask); + } +} + + +/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */ +static inline unsigned int fms(unsigned int x) +{ + int r; + + __asm__("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $0,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r; +} + +/* fms: find least sigificant bit set */ +static inline unsigned int fls(unsigned int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $32,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r; +} + +static unsigned int range_to_mtrr(unsigned int reg, + unsigned long range_startk, unsigned long range_sizek, + unsigned long next_range_startk, unsigned char type, unsigned address_bits) +{ + if (!range_sizek || (reg >= 8)) { + return reg; + } + while(range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + /* Compute the maximum size I can make a range */ + max_align = fls(range_startk); + align = fms(range_sizek); + if (align > max_align) { + align = max_align; + } + sizek = 1 << align; +#if MEM_TRAIN_SEQ != 1 + #if CONFIG_USE_INIT == 1 + printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type %s\r\n", + reg, range_startk >>10, sizek >> 10, + (type==MTRR_TYPE_UNCACHEABLE)?"UC": + ((type==MTRR_TYPE_WRBACK)?"WB":"Other") + ); + #else + print_debug("Setting variable MTRR "); print_debug_hex8(reg); print_debug(", base: "); print_debug_hex16(range_startk>>10); + print_debug("MB, range: "); print_debug_hex16(sizek >> 10); print_debug("MB, type "); + print_debug( (type==MTRR_TYPE_UNCACHEABLE)?"UC\r\n": + ((type==MTRR_TYPE_WRBACK)?"WB\r\n":"Other\r\n") + ); + #endif +#endif + set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= 8) + break; + } + return reg; +} + +static void set_top_mem_ap(unsigned tom_k, unsigned tom2_k) +{ + msr_t msr; + + /* Now set top of memory */ + msr.lo = (tom2_k & 0x003fffff) << 10; + msr.hi = (tom2_k & 0xffc00000) >> 22; + wrmsr(TOP_MEM2, msr); + + msr.lo = (tom_k & 0x003fffff) << 10; + msr.hi = (tom_k & 0xffc00000) >> 22; + wrmsr(TOP_MEM, msr); +} + +static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){ + unsigned reg; + msr_t msr; + +#if 0 + //still enable from cache_as_ram.inc + msr = rdmsr(SYSCFG_MSR); + msr.lo |= SYSCFG_MSR_MtrrFixDramModEn; + wrmsr(SYSCFG_MSR,msr); +#endif + + //[0,512k), [512k, 640k) + msr.hi = 0x1e1e1e1e; + msr.lo = msr.hi; + wrmsr(0x250, msr); + wrmsr(0x258, msr); + + //[1M, TOM) + reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40); + + //[4G, TOM2) + if(tom2_k) { + //enable tom2 and type + msr = rdmsr(SYSCFG_MSR); + msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB + wrmsr(SYSCFG_MSR, msr); + } + +} + +static void clear_mtrr_dqs(unsigned tom2_k){ + msr_t msr; + unsigned i; + + //still enable from cache_as_ram.inc + msr = rdmsr(SYSCFG_MSR); + msr.lo |= SYSCFG_MSR_MtrrFixDramModEn; + wrmsr(SYSCFG_MSR,msr); + + //[0,512k), [512k, 640k) + msr.hi = 0; + msr.lo = msr.hi; + wrmsr(0x250, msr); + wrmsr(0x258, msr); + + //[1M, TOM) + for(i=0x204;i<0x210;i++) { + wrmsr(i, msr); + } + + //[4G, TOM2) + if(tom2_k) { + //enable tom2 and type + msr = rdmsr(SYSCFG_MSR); + msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB + wrmsr(SYSCFG_MSR, msr); + } +} + +static void set_htic_bit(unsigned i, unsigned val, unsigned bit) +{ + uint32_t dword; + dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL); + dword &= ~(1<<bit); + dword |= ((val & 1) <<bit); + pci_write_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL, dword); +} + + +static unsigned get_htic_bit(unsigned i, unsigned bit) +{ + uint32_t dword; + dword = pci_read_config32(PCI_DEV(0, 0x18+i, 0), HT_INIT_CONTROL); + dword &= (1<<bit); + return dword; +} + +static void wait_till_sysinfo_in_ram(void) +{ + while(1) { + if(get_htic_bit(0, 9)) return; + } +} + +static void set_sysinfo_in_ram(unsigned val) +{ + set_htic_bit(0, val, 9); +} + + +#if MEM_TRAIN_SEQ == 0 + + +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 +static void dqs_timing(int controllers, const struct mem_controller *ctrl, tsc_t *tsc0, struct sys_info *sysinfo) +#else +static void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo) +#endif +{ + int i; + + tsc_t tsc[5]; + + //need to enable mtrr, so dqs training could access the test address + setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k); + + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[ i ]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + fill_mem_cs_sysinfo(i, ctrl+i, sysinfo); + } + + tsc[0] = rdtsc(); + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[ i ]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + print_debug("DQS Training:RcvrEn:Pass1: "); + print_debug_hex8(i); + train_DqsRcvrEn(ctrl+i, 1, sysinfo); + print_debug(" done\r\n"); + } + + tsc[1] = rdtsc(); +#if K8_REV_F_SUPPORT_F0_F1_WORKAROUND == 1 + f0_svm_workaround(controllers, ctrl, tsc0, sysinfo); +#endif + + tsc[2] = rdtsc(); + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[i]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + print_debug("DQS Training:DQSPos: "); + print_debug_hex8(i); + train_DqsPos(ctrl+i, sysinfo); + print_debug(" done\r\n"); + } + + tsc[3] = rdtsc(); + for(i = 0; i < controllers; i++) { + if (!sysinfo->ctrl_present[i]) + continue; + + /* Skip everything if I don't have any memory on this controller */ + if(sysinfo->meminfo[i].dimm_mask==0x00) continue; + + print_debug("DQS Training:RcvrEn:Pass2: "); + print_debug_hex8(i); + train_DqsRcvrEn(ctrl+i, 2, sysinfo); + print_debug(" done\r\n"); + sysinfo->mem_trained[i]=1; + } + + tsc[4] = rdtsc(); + clear_mtrr_dqs(sysinfo->tom2_k); + + + for(i=0;i<5;i++) { + print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo); + } + + + +} + +#endif + + +#if MEM_TRAIN_SEQ > 0 + +static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned v) +{ + + int ii; + + tsc_t tsc[4]; + + +#if MEM_TRAIN_SEQ == 1 + if(sysinfo->mem_trained[i]) return; + //need to enable mtrr, so dqs training could access the test address + setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k); +#endif + + fill_mem_cs_sysinfo(i, ctrl+i, sysinfo); + + if(v) { + tsc[0] = rdtsc(); + + print_debug("set DQS timing:RcvrEn:Pass1: "); + print_debug_hex8(i); + } + train_DqsRcvrEn(ctrl+i, 1, sysinfo); + + if(v) { + print_debug(" done\r\n"); + tsc[1] = rdtsc(); + print_debug("set DQS timing:DQSPos: "); + print_debug_hex8(i); + } + + train_DqsPos(ctrl+i, sysinfo); + + if(v) { + print_debug(" done\r\n"); + tsc[2] = rdtsc(); + + print_debug("set DQS timing:RcvrEn:Pass2: "); + print_debug_hex8(i); + } + train_DqsRcvrEn(ctrl+i, 2, sysinfo); + + if(v) { + print_debug(" done\r\n"); + + tsc[3] = rdtsc(); + } + +#if MEM_TRAIN_SEQ == 1 + clear_mtrr_dqs(sysinfo->tom2_k); +#endif + + if(v) { + for(ii=0;ii<4;ii++) { + print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo); + } + } + + sysinfo->mem_trained[i]=1; + +} +#endif + +#if MEM_TRAIN_SEQ == 1 +static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox) +{ + dqs_timing(nodeid, sysinfo->ctrl,sysinfo, 0); // keep the output tidy +// memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8); +// memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9); + sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid]; + +} +static void copy_and_run_ap_code_in_car(unsigned ret_addr); +static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall) +{ + if(coreid) return; // only do it on core0 + struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE); + wait_till_sysinfo_in_ram(); // use pci to get it + + if(sysinfox->mem_trained[nodeid] == 0) { + if (sysinfox->ctrl_present[ nodeid ] && sysinfox->meminfo[nodeid].dimm_mask) { + sysinfo->tom_k = sysinfox->tom_k; + sysinfo->tom2_k = sysinfox->tom2_k; + sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128; + set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's + #if CONFIG_AP_CODE_IN_CAR == 0 + print_debug("CODE IN ROM AND RUN ON NODE:"); print_debug_hex8(nodeid); print_debug("\r\n"); + train_ram(nodeid, sysinfo, sysinfox); + #else + /* Can copy dqs_timing to ap cache and run from cache? + * we need linuxbios_ap_car.rom? and treat it as linuxbios_ram.rom for ap ? + */ + copy_and_run_ap_code_in_car(retcall); + // will go back by jump + #endif + } + } +} +#endif diff --git a/src/northbridge/amd/amdk8/setup_resource_map.c b/src/northbridge/amd/amdk8/setup_resource_map.c index 27da719409..6b710334e4 100644 --- a/src/northbridge/amd/amdk8/setup_resource_map.c +++ b/src/northbridge/amd/amdk8/setup_resource_map.c @@ -21,8 +21,8 @@ static void setup_resource_map_offset(const unsigned int *register_values, int m print_debug("\r\n"); #endif #endif - dev = (register_values[i] & ~0xff) + offset_pci_dev; - where = register_values[i] & 0xff; + dev = (register_values[i] & ~0xfff) + offset_pci_dev; + where = register_values[i] & 0xfff; reg = pci_read_config32(dev, where); reg &= register_values[i+1]; reg |= register_values[i+2] + offset_io_base; @@ -58,13 +58,13 @@ static void setup_resource_map_x_offset(const unsigned int *register_values, int #if RES_DEBUG #if CONFIG_USE_INIT printk_debug("%04x: %02x %08x <- & %08x | %08x\r\n", - i/4, register_values[i], + i>>2, register_values[i], register_values[i+1] + ( (register_values[i]==RES_PCI_IO) ? offset_pci_dev : 0), register_values[i+2], register_values[i+3] + ( ( (register_values[i] & RES_PORT_IO_32) == RES_PORT_IO_32) ? offset_io_base : 0) ); #else - print_debug_hex16(i/4); + print_debug_hex16(i>>2); print_debug(": "); print_debug_hex8(register_values[i]); print_debug(" "); @@ -84,8 +84,8 @@ static void setup_resource_map_x_offset(const unsigned int *register_values, int device_t dev; unsigned where; unsigned long reg; - dev = (register_values[i+1] & ~0xff) + offset_pci_dev; - where = register_values[i+1] & 0xff; + dev = (register_values[i+1] & ~0xfff) + offset_pci_dev; + where = register_values[i+1] & 0xfff; reg = pci_read_config32(dev, where); reg &= register_values[i+2]; reg |= register_values[i+3]; @@ -173,8 +173,8 @@ static void setup_resource_map_x(const unsigned int *register_values, int max) device_t dev; unsigned where; unsigned long reg; - dev = register_values[i+1] & ~0xff; - where = register_values[i+1] & 0xff; + dev = register_values[i+1] & ~0xfff; + where = register_values[i+1] & 0xfff; reg = pci_read_config32(dev, where); reg &= register_values[i+2]; reg |= register_values[i+3]; diff --git a/src/northbridge/amd/amdk8/spd_ddr2.h b/src/northbridge/amd/amdk8/spd_ddr2.h new file mode 100644 index 0000000000..e70020d52d --- /dev/null +++ b/src/northbridge/amd/amdk8/spd_ddr2.h @@ -0,0 +1,66 @@ +/* SPDs for DDR2 SDRAM */ +#define SPD_MEM_TYPE 2 + #define SPD_MEM_TYPE_SDRAM_DDR 0x07 + #define SPD_MEM_TYPE_SDRAM_DDR2 0x08 + +#define SPD_DIMM_TYPE 20 /* x bit0 or bit4 =1 mean registered*/ + #define SPD_DIMM_TYPE_RDIMM (1<<0) + #define SPD_DIMM_TYPE_UDIMM (1<<1) + #define SPD_DIMM_TYPE_SODIMM (1<<2) + #define SPD_DIMM_TYPE_uDIMM (1<<3) + #define SPD_DIMM_TYPE_mRDIMM (1<<4) + #define SPD_DIMM_TYPE_mUDIMM (1<<5) +#define SPD_MOD_ATTRIB 21 + #define SPD_MOD_ATTRIB_DIFCK 0x20 + #define SPD_MOD_ATTRIB_REGADC 0x11 /* x */ + #define SPD_MOD_ATTRIB_PROBE 0x40 + +#define SPD_DEV_ATTRIB 22 /* Device attributes --- general */ +#define SPD_DIMM_CONF_TYPE 11 + #define SPD_DIMM_CONF_TYPE_ECC 0x02 + #define SPD_DIMM_CONF_TYPE_ADDR_PARITY 0x04 /* ? */ + +#define SPD_ROW_NUM 3 /* Number of Row addresses */ +#define SPD_COL_NUM 4 /* Number of Column addresses */ +#define SPD_BANK_NUM 17 /* SDRAM Device attributes - Number of Banks on SDRAM device, it could be 0x4, 0x8, so address lines for that would be 2, and 3 */ + +#define SPD_MOD_ATTRIB_RANK 5 /* include Number of Ranks bit [2:0], Package (bit4, 1=stack, 0=planr), Height bit[7:5] */ + #define SPD_MOD_ATTRIB_RANK_NUM_SHIFT 0 + #define SPD_MOD_ATTRIB_RANK_NUM_MASK 0x07 + #define SPD_MOD_ATTRIB_RANK_NUM_BASE 1 + #define SPD_MOD_ATTRIB_RANK_NUM_MIN 1 + #define SPD_MOD_ATTRIB_RANK_NUM_MAX 8 + +#define SPD_RANK_SIZE 31 /* Only one bit is set */ + #define SPD_RANK_SIZE_1GB (1<<0) + #define SPD_RANK_SIZE_2GB (1<<1) + #define SPD_RANK_SIZE_4GB (1<<2) + #define SPD_RANK_SIZE_8GB (1<<3) + #define SPD_RANK_SIZE_16GB (1<<4) + #define SPD_RANK_SIZE_128MB (1<<5) + #define SPD_RANK_SIZE_256MB (1<<6) + #define SPD_RANK_SIZE_512MB (1<<7) + +#define SPD_DATA_WIDTH 6 /* valid value 0, 32, 33, 36, 64, 72, 80, 128, 144, 254, 255 */ +#define SPD_PRI_WIDTH 13 /* Primary SDRAM Width, it could be 0x08 or 0x10 */ +#define SPD_ERR_WIDTH 14 /* Error Checking SDRAM Width, it could be 0x08 or 0x10 */ + +#define SPD_CAS_LAT 18 /* SDRAM Device Attributes -- CAS Latency */ + #define SPD_CAS_LAT_2 (1<<2) + #define SPD_CAS_LAT_3 (1<<3) + #define SPD_CAS_LAT_4 (1<<4) + #define SPD_CAS_LAT_5 (1<<5) + #define SPD_CAS_LAT_6 (1<<6) + +#define SPD_TRP 27 /* bit [7:2] = 1-63 ns, bit [1:0] 0.25ns+, final value ((val>>2) + (val & 3) * 0.25)ns */ +#define SPD_TRRD 28 +#define SPD_TRCD 29 +#define SPD_TRAS 30 +#define SPD_TWR 36 /* x */ +#define SPD_TWTR 37 /* x */ +#define SPD_TRTP 38 /* x */ + +#define SPD_TRC 41 /* add byte 0x40 bit [3:1] , so final val41+ table[((val40>>1) & 0x7)] ... table[]={0, 0.25, 0.33, 0.5, 0.75, 0, 0}*/ +#define SPD_TRFC 42 /* add byte 0x40 bit [6:4] , so final val42+ table[((val40>>4) & 0x7)] + (val40 & 1)*256*/ + +#define SPD_TREF 12 diff --git a/src/northbridge/amd/amdk8/ssdt.dsl b/src/northbridge/amd/amdk8/ssdt.dsl new file mode 100644 index 0000000000..0fa002679a --- /dev/null +++ b/src/northbridge/amd/amdk8/ssdt.dsl @@ -0,0 +1,78 @@ +/* + * Copyright 2005 AMD + */ +DefinitionBlock ("SSDT.aml", "SSDT", 1, "AMD-K8", "AMD-ACPI", 100925440) +{ + /* + * These objects were referenced but not defined in this table + */ + External (\_SB_.PCI0, DeviceObj) + + Scope (\_SB.PCI0) + { + Name (BUSN, Package (0x04) + { + 0x11111111, + 0x22222222, + 0x33333333, + 0x44444444 + }) + Name (MMIO, Package (0x10) + { + 0x11111111, + 0x22222222, + 0x33333333, + 0x44444444, + 0x55555555, + 0x66666666, + 0x77777777, + 0x88888888, + 0x99999999, + 0xaaaaaaaa, + 0xbbbbbbbb, + 0xcccccccc, + 0xdddddddd, + 0xeeeeeeee, + 0x11111111, + 0x22222222 + }) + Name (PCIO, Package (0x08) + { + 0x77777777, + 0x88888888, + 0x99999999, + 0xaaaaaaaa, + 0xbbbbbbbb, + 0xcccccccc, + 0xdddddddd, + 0xeeeeeeee + }) + Name (SBLK, 0x11) + Name (TOM1, 0xaaaaaaaa) + Name (SBDN, 0xbbbbbbbb) + Name (HCLK, Package (0x08) + { + 0x11111111, + 0x22222222, + 0x33333333, + 0x44444444, + 0x55555555, + 0x66666666, + 0x77777777, + 0x88888888 + }) + Name (HCDN, Package (0x08) + { + 0x11111111, + 0x22222222, + 0x33333333, + 0x44444444, + 0x55555555, + 0x66666666, + 0x77777777, + 0x88888888 + }) + Name (CBST, 0x88) + } +} + |