diff options
author | arch import user (historical) <svn@openbios.org> | 2005-07-06 17:15:30 +0000 |
---|---|---|
committer | arch import user (historical) <svn@openbios.org> | 2005-07-06 17:15:30 +0000 |
commit | ef03afa405b049a172146aab93cfb81fb21f3945 (patch) | |
tree | 3b59033be66edd60c2cc6c66d6875153dc052a72 /src/northbridge/amd/amdk8 | |
parent | 014c3e185fe8e1455e56efeb496715a67ce292bb (diff) | |
download | coreboot-ef03afa405b049a172146aab93cfb81fb21f3945.tar.xz |
Revision: linuxbios@linuxbios.org--devel/freebios--devel--2.0--patch-34
Creator: Yinghai Lu <yhlu@tyan.com>
AMD D0/E0 Opteron new mem mapping support, AMD E Opteron mem hole support,AMD K8 Four Ranks DIMM support
git-svn-id: svn://svn.coreboot.org/coreboot/trunk@1950 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
Diffstat (limited to 'src/northbridge/amd/amdk8')
-rw-r--r-- | src/northbridge/amd/amdk8/coherent_ht.c | 358 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/cpu_rev.c | 31 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/debug.c | 24 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/incoherent_ht.c | 3 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/misc_control.c | 4 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/northbridge.c | 217 | ||||
-rw-r--r-- | src/northbridge/amd/amdk8/raminit.c | 288 |
7 files changed, 722 insertions, 203 deletions
diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c index 8ca1bfbf89..ca7791cf63 100644 --- a/src/northbridge/amd/amdk8/coherent_ht.c +++ b/src/northbridge/amd/amdk8/coherent_ht.c @@ -92,14 +92,22 @@ typedef uint32_t u32; #define K8_HT_FREQ_1G_SUPPORT 0 #endif -#ifndef CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED -#define CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED 0 +#ifndef K8_HT_CHECK_PENDING_LINK + #if CONFIG_MAX_PHYSICAL_CPUS >= 4 + #define K8_HT_CHECK_PENDING_LINK 1 + #else + #define K8_HT_CHECK_PENDING_LINK 0 + #endif +#endif + +#ifndef CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED +#define CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED 0 #endif static inline void print_linkn (const char *strval, uint8_t byteval) { -#if 1 +#if 0 print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n"); #endif } @@ -203,7 +211,7 @@ static void fill_row(u8 node, u8 row, u32 value) pci_write_config32(NODE_HT(node), 0x40+(row<<2), value); } -#if CONFIG_MAX_CPUS > 1 +#if CONFIG_MAX_PHYSICAL_CPUS > 1 static u8 link_to_register(int ldt) { /* @@ -247,6 +255,23 @@ static void rename_temp_node(u8 node) print_spew(" done.\r\n"); } +#if K8_HT_CHECK_PENDING_LINK == 1 +static void wait_ht_stable(uint8_t node) +{ + uint8_t linkn; + for(linkn = 0; linkn<3; linkn++) { + uint8_t regpos; + uint16_t i; + uint32_t reg; + regpos = 0x98 + 0x20 * linkn; + for(i = 0; i < 0xff; i++) { //wait to make sure it is done + reg = pci_read_config32(NODE_HT(node), regpos); + if ((reg & 0x10) == 0) break; // init complete + udelay(10); + } + } +} +#endif static int check_connection(u8 dest) { @@ -260,21 +285,32 @@ static int check_connection(u8 dest) val = pci_read_config32(NODE_HT(dest),0); if(val != 0x11001022) return 0; +// needed? +#if K8_HT_CHECK_PENDING_LINK == 1 + wait_ht_stable(dest); +#endif return 1; } -static unsigned read_freq_cap(device_t dev, unsigned pos) +static uint16_t read_freq_cap(device_t dev, uint8_t pos) { /* Handle bugs in valid hypertransport frequency reporting */ - unsigned freq_cap; + uint16_t freq_cap; uint32_t id; freq_cap = pci_read_config16(dev, pos); freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */ - /* AMD K8 Unsupported 1Ghz? */ + + #if K8_HT_FREQ_1G_SUPPORT == 1 + if (!is_cpu_pre_e0()) + return freq_cap; + #endif + id = pci_read_config32(dev, 0); + + /* AMD K8 Unsupported 1Ghz? */ if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) { freq_cap &= ~(1 << HT_FREQ_1000Mhz); } @@ -338,7 +374,7 @@ static int optimize_connection(device_t node1, uint8_t link1, device_t node2, ui /* Set node1's widths */ pci_write_config8(node1, link1 + PCI_HT_CAP_HOST_WIDTH + 1, width); - /* Calculate node2's width */ + // * Calculate node2's width */ width = ((width & 0x70) >> 4) | ((width & 0x7) << 4); /* See if I am changing node2's width */ @@ -351,13 +387,38 @@ static int optimize_connection(device_t node1, uint8_t link1, device_t node2, ui return needs_reset; } +static uint8_t get_linkn_first(uint8_t byte) +{ + if(byte & 0x02) { byte = 0; } + else if(byte & 0x04) { byte = 1; } + else if(byte & 0x08) { byte = 2; } + return byte; +} + +static uint8_t get_linkn_last(uint8_t byte) +{ + if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; } + if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; } + if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; } + return byte>>4; +} + +static uint8_t get_linkn_last_count(uint8_t byte) +{ + byte &= 0x0f; + if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; } + if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; } + if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; } + return byte>>4; +} + static void setup_row_local(u8 source, u8 row) /* source will be 7 when it is for temp use*/ { - unsigned linkn; + uint8_t linkn; uint32_t val; val = 1; for(linkn = 0; linkn<3; linkn++) { - unsigned regpos; + uint8_t regpos; uint32_t reg; regpos = 0x98 + 0x20 * linkn; reg = pci_read_config32(NODE_HT(source), regpos); @@ -378,12 +439,16 @@ static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn) if(((source &1)!=(dest &1)) #if CROSS_BAR_47_56 - && (source<4) && (dest<4) + && ( (source<4)||(source>5) ) //(6,7) (7,6) should still be here + //(6,5) (7,4) should be here #endif ){ val |= (1<<16); } else { - /*for CROSS_BAR_47_56 47, 74, 56, 65 should be here too*/ + /*for CROSS_BAR_47_56 47, 56, should be here too + and for 47, 56, 57, 75, 46, 64 we need to substract another link to + 6, 7, 6, 6, 7, 7 + */ val_s = get_row(temp, source); val |= ((val_s>>16) - (1<<(linkn+1)))<<16; } @@ -391,37 +456,43 @@ static void setup_row_direct_x(u8 temp, u8 source, u8 dest, u8 linkn) fill_row(temp,dest, val ); } -static void setup_row_direct(u8 source, u8 dest, u8 linkn){ - setup_row_direct_x(source, source, dest, linkn); +#if CROSS_BAR_47_56 +static void opt_broadcast_rt(u8 source, u8 dest, u8 kickout) { + uint32_t val; + val = get_row(source, dest); + val -= link_connection(source, kickout)<<16; + fill_row(source, dest, val); } -static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){ - setup_row_direct_x(7, source, dest, linkn); -} +static void opt_broadcast_rt_group(const u8 *conn, int num) { + int i; -static uint8_t get_linkn_first(uint8_t byte) -{ - if(byte & 0x02) { byte = 0; } - else if(byte & 0x04) { byte = 1; } - else if(byte & 0x08) { byte = 2; } - return byte; + for(i=0; i<num; i+=3) { + opt_broadcast_rt(conn[i], conn[i+1],conn[i+2]); + } } +static void opt_broadcast_rt_plus(u8 source, u8 dest, u8 kickout) { + uint32_t val; + val = get_row(source, dest); + val += link_connection(source, kickout)<<16; + fill_row(source, dest, val); +} + +static void opt_broadcast_rt_plus_group(const u8 *conn, int num) { + int i; + + for(i=0; i<num; i+=3) { + opt_broadcast_rt_plus(conn[i], conn[i+1],conn[i+2]); + } +} +#endif -static uint8_t get_linkn_last(uint8_t byte) -{ - if(byte & 0x02) { byte &= 0x0f; byte |= 0x00; } - if(byte & 0x04) { byte &= 0x0f; byte |= 0x10; } - if(byte & 0x08) { byte &= 0x0f; byte |= 0x20; } - return byte>>4; +static void setup_row_direct(u8 source, u8 dest, u8 linkn){ + setup_row_direct_x(source, source, dest, linkn); } -static uint8_t get_linkn_last_count(uint8_t byte) -{ - byte &= 0x0f; - if(byte & 0x02) { byte &= 0xcf; byte |= 0x00; byte+=0x40; } - if(byte & 0x04) { byte &= 0xcf; byte |= 0x10; byte+=0x40; } - if(byte & 0x08) { byte &= 0xcf; byte |= 0x20; byte+=0x40; } - return byte>>4; +static void setup_remote_row_direct(u8 source, u8 dest, u8 linkn){ + setup_row_direct_x(7, source, dest, linkn); } static void setup_temp_row(u8 source, u8 dest) @@ -462,10 +533,10 @@ static void setup_remote_node(u8 node) print_spew("done\r\n"); } -#endif /* CONFIG_MAX_CPUS > 1*/ +#endif /* CONFIG_MAX_PHYSICAL_CPUS > 1*/ -#if CONFIG_MAX_CPUS > 2 +#if CONFIG_MAX_PHYSICAL_CPUS > 2 #if !CROSS_BAR_47_56 static void setup_row_indirect_x(u8 temp, u8 source, u8 dest) #else @@ -497,7 +568,7 @@ static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 dif if(diff && (val_s!=(val&0xff)) ) { /* use another connect as response*/ val_s -= val & 0xff; -#if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1) +#if (CONFIG_MAX_PHYSICAL_CPUS > 4) || (CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED == 1) uint8_t byte; /* Some node have two links left * don't worry we only have (2, (3 as source need to handle @@ -508,7 +579,19 @@ static void setup_row_indirect_x(u8 temp, u8 source, u8 dest, u8 gateway, u8 dif if(source<dest) { val_s-=link_connection(temp, source-2); /* -down*/ } else { - val_s-=link_connection(temp, source+2); /* -up*/ +#if CROSS_BAR_47_56 + #if 0 + if(source==7) { + val_s-=link_connection(temp, 6); // for 7,2 via 5 + } else if (source==6){ + val_s-=link_connection(temp, 7); // for 6,3 via 4 + } else + #endif + if (source < gateway) { // for 5, 4 via 7 + val_s-=link_connection(temp, source-2); + } else +#endif + val_s-=link_connection(temp, source+2); /* -up*/ } } #endif @@ -581,12 +664,16 @@ static void setup_remote_row_indirect_group(const u8 *conn, int num) } } -#endif /*CONFIG_MAX_CPUS > 2*/ +#endif /*CONFIG_MAX_PHYSICAL_CPUS > 2*/ static void setup_uniprocessor(void) { print_spew("Enabling UP settings\r\n"); +#if CONFIG_LOGICAL_CPUS==1 + unsigned tmp = (pci_read_config32(NODE_MC(0), 0xe8) >> 12) & 3; + if (tmp>0) return; +#endif disable_probes(); } @@ -595,7 +682,7 @@ struct setup_smp_result { int needs_reset; }; -#if CONFIG_MAX_CPUS > 2 +#if CONFIG_MAX_PHYSICAL_CPUS > 2 static int optimize_connection_group(const u8 *opt_conn, int num) { int needs_reset = 0; int i; @@ -608,7 +695,7 @@ static int optimize_connection_group(const u8 *opt_conn, int num) { } #endif -#if CONFIG_MAX_CPUS > 1 +#if CONFIG_MAX_PHYSICAL_CPUS > 1 static struct setup_smp_result setup_smp2(void) { struct setup_smp_result result; @@ -635,7 +722,7 @@ static struct setup_smp_result setup_smp2(void) print_linkn("(0,1) link=", byte); setup_row_direct(0,1, byte); setup_temp_row(0, 1); - + check_connection(7); /* We found 2 nodes so far */ @@ -645,7 +732,7 @@ static struct setup_smp_result setup_smp2(void) setup_row_local(7,1); setup_remote_row_direct(1, 0, byte); -#if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1) +#if (CONFIG_MAX_PHYSICAL_CPUS > 4) || (CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED == 1) val = get_row(7,1); byte = (val>>16) & 0xfe; byte = get_linkn_last_count(byte); @@ -660,7 +747,7 @@ static struct setup_smp_result setup_smp2(void) print_linkn("\t-->(0,1) link=", byte); setup_row_direct(0,1, byte); setup_temp_row(0, 1); - + check_connection(7); /* We found 2 nodes so far */ @@ -679,16 +766,16 @@ static struct setup_smp_result setup_smp2(void) /*don't need and it is done by clear_dead_links */ clear_temp_row(0); #endif - - result.needs_reset = optimize_connection( + + result.needs_reset |= optimize_connection( NODE_HT(0), 0x80 + link_to_register(link_connection(0,1)), NODE_HT(1), 0x80 + link_to_register(link_connection(1,0)) ); return result; } -#endif /*CONFIG_MAX_CPUS > 1 */ +#endif /*CONFIG_MAX_PHYSICAL_CPUS > 1 */ -#if CONFIG_MAX_CPUS > 2 +#if CONFIG_MAX_PHYSICAL_CPUS > 2 static struct setup_smp_result setup_smp4(int needs_reset) { @@ -776,7 +863,7 @@ static struct setup_smp_result setup_smp4(int needs_reset) setup_temp_row(2,3); check_connection(7); /* to 3*/ -#if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1) +#if (CONFIG_MAX_PHYSICAL_CPUS > 4) || (CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED == 1) /* We need to find out which link is to node3 */ if((byte>>2)==2) { /* one to node3, one to node0, one to node4*/ val = get_row(7,3); @@ -797,7 +884,7 @@ static struct setup_smp_result setup_smp4(int needs_reset) print_linkn("(3,2) link=", byte); setup_remote_row_direct(3,2, byte); -#if (CONFIG_MAX_CPUS > 4) || (CONFIG_MAX_CPUS_4_BUT_MORE_INSTALLED == 1) +#if (CONFIG_MAX_PHYSICAL_CPUS > 4) || (CONFIG_MAX_PHYSICAL_CPUS_4_BUT_MORE_INSTALLED == 1) /* set link from 3 to 5 before enable it*/ val = get_row(7,3); byte = ((val>>16) & 0xfe) - link_connection(7,2) - link_connection(7,1); @@ -861,15 +948,15 @@ static struct setup_smp_result setup_smp4(int needs_reset) 2,3, }; - result.needs_reset = optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0])); + result.needs_reset |= optimize_connection_group(opt_conn4, sizeof(opt_conn4)/sizeof(opt_conn4[0])); return result; } -#endif /* CONFIG_MAX_CPUS > 2 */ +#endif /* CONFIG_MAX_PHYSICAL_CPUS > 2 */ -#if CONFIG_MAX_CPUS > 4 +#if CONFIG_MAX_PHYSICAL_CPUS > 4 static struct setup_smp_result setup_smp6(int needs_reset) { @@ -975,7 +1062,7 @@ static struct setup_smp_result setup_smp6(int needs_reset) setup_temp_row(4,5); check_connection(7); /* to 5*/ -#if CONFIG_MAX_CPUS > 6 +#if CONFIG_MAX_PHYSICAL_CPUS > 6 /* We need to find out which link is to node5 */ if((byte>>2)==2) { /* one to node5, one to node2, one to node6*/ @@ -1075,15 +1162,15 @@ static struct setup_smp_result setup_smp6(int needs_reset) 4, 5, #endif }; - result.needs_reset = optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0])); + result.needs_reset |= optimize_connection_group(opt_conn6, sizeof(opt_conn6)/sizeof(opt_conn6[0])); return result; } -#endif /* CONFIG_MAX_CPUS > 4 */ +#endif /* CONFIG_MAX_PHYSICAL_CPUS > 4 */ -#if CONFIG_MAX_CPUS > 6 +#if CONFIG_MAX_PHYSICAL_CPUS > 6 static struct setup_smp_result setup_smp8(int needs_reset) { @@ -1114,10 +1201,10 @@ static struct setup_smp_result setup_smp8(int needs_reset) return result; } #if TRY_HIGH_FIRST == 1 - byte &= 3; /* bit [3,2] is count-1 or 2*/ -#else byte = ((val>>16) & 0xfe) - link_connection(4,2); byte = get_linkn_first(byte); /*Min link to 6*/ +#else + byte &= 3; /* bit [3,2] is count-1 or 2*/ #endif print_linkn("(4,6) link=", byte); setup_row_direct(4, 6, byte); @@ -1153,7 +1240,7 @@ static struct setup_smp_result setup_smp8(int needs_reset) /*1, 7, 3, 0,*/ 2, 6, 4, 0, /*2, 7, 4, 0,*/ - 3, 6, 5, 0, + 3, 6, 5, 1, /*3, 7, 5, 0,*/ #endif }; @@ -1190,9 +1277,9 @@ static struct setup_smp_result setup_smp8(int needs_reset) val = get_row(5,5); byte = ((val>>16) & 0xfe) - link_connection(5,3); #if TRY_HIGH_FIRST == 1 - byte = get_linkn_last(byte); + byte = get_linkn_first(byte); #else - byte = get_linkn_first(byte); + byte = get_linkn_last(byte); #endif print_linkn("(5,6) link=", byte); setup_row_direct(5, 6, byte); @@ -1339,30 +1426,93 @@ static struct setup_smp_result setup_smp8(int needs_reset) 7, 3, 7, 4, #else + + + 4, 5, 6, 1, + 5, 4, 7, 1, + + 6, 1, 5, 0, // or 4, 1 + 6, 2, 4, 0, + 6, 3, 5, 0, // or 4, 1 + + 7, 0, 4, 0, // or 5, 1 + 7, 1, 5, 0, + 7, 2, 4, 0, // or 5, 1 + 7, 3, 5, 0, + 0, 7, 2, 0, /* restore it*/ 1, 7, 3, 0, - 2, 7, 4, 0, + 2, 7, 4, 1, 3, 7, 5, 0, - 6, 1, 5, 0, - 6, 2, 4, 0, - 6, 3, 5, 0, - - 7, 0, 4, 0, - 7, 1, 5, 0, - 7, 2, 4, 0, - 7, 3, 5, 0, + 2, 5, 4, 1, /* reset it */ + 3, 4, 5, 1, + + 4, 1, 2, 1, /* reset it */ + 4, 3, 2, 1, + + 5, 2, 3, 1, /* reset it */ + 5, 0, 3, 1, - 4, 5, 6, 1, - 5, 4, 7, 1, #endif }; setup_row_indirect_group(conn8_3, sizeof(conn8_3)/sizeof(conn8_3[0])); + +#if CROSS_BAR_47_56 + /* for 47, 56, 57, 75, 46, 64 we need to substract another link to + 6, 7, 6, 6, 7, 7 */ + static const u8 conn8_4[] = { +//direct + 4, 7, 6, + 5, 6, 7, + 5, 7, 6, + 7, 5, 6, + 4, 6, 7, + 6, 4, 7, + +//in direct + 0, 6, 1, + 0, 7, 1, + + 1, 6, 0, + 1, 7, 0, + + 2, 6, 3, +// 2, 7, 3, + + +// 3, 6, 1, + + 3, 7, 2, + + 6, 0, 7, + 6, 1, 7, // needed for via 5 + 6, 1, 4, // ??? + 6, 2, 7, + 6, 3, 7, // needed for via 5 + 6, 3, 4, //??? + 7, 0, 6, // needed for via 4 + 7, 0, 5, //??? + 7, 1, 6, + 7, 2, 6, // needed for via 4 + 7, 2, 5, //??? + 7, 3, 6, + }; + + opt_broadcast_rt_group(conn8_4, sizeof(conn8_4)/sizeof(conn8_4[0])); + + static const u8 conn8_5[] = { + 2, 7, 0, + + 3, 6, 1, + }; + + opt_broadcast_rt_plus_group(conn8_5, sizeof(conn8_5)/sizeof(conn8_5[0])); +#endif + + /* ready to enable RT for Node 7 */ enable_routing(7); /* enable routing on node 7 (temp.) */ - static const uint8_t opt_conn8[] ={ 4, 6, @@ -1374,15 +1524,15 @@ static struct setup_smp_result setup_smp8(int needs_reset) 6, 7, }; /* optimize physical connections - by LYH */ - result.needs_reset = optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0])); + result.needs_reset |= optimize_connection_group(opt_conn8, sizeof(opt_conn8)/sizeof(opt_conn8[0])); return result; } -#endif /* CONFIG_MAX_CPUS > 6 */ +#endif /* CONFIG_MAX_PHYSICAL_CPUS > 6 */ -#if CONFIG_MAX_CPUS > 1 +#if CONFIG_MAX_PHYSICAL_CPUS > 1 static struct setup_smp_result setup_smp(void) { @@ -1391,17 +1541,17 @@ static struct setup_smp_result setup_smp(void) print_spew("Enabling SMP settings\r\n"); result = setup_smp2(); -#if CONFIG_MAX_CPUS > 2 +#if CONFIG_MAX_PHYSICAL_CPUS > 2 if(result.nodes == 2) result = setup_smp4(result.needs_reset); #endif -#if CONFIG_MAX_CPUS > 4 +#if CONFIG_MAX_PHYSICAL_CPUS > 4 if(result.nodes == 4) result = setup_smp6(result.needs_reset); #endif -#if CONFIG_MAX_CPUS > 6 +#if CONFIG_MAX_PHYSICAL_CPUS > 6 if(result.nodes == 6) result = setup_smp8(result.needs_reset); #endif @@ -1424,7 +1574,7 @@ static unsigned verify_mp_capabilities(unsigned nodes) } switch(mask) { -#if CONFIG_MAX_CPUS > 2 +#if CONFIG_MAX_PHYSICAL_CPUS > 2 case 0x02: /* MPCap */ if(nodes > 2) { print_err("Going back to DP\r\n"); @@ -1449,7 +1599,7 @@ static void clear_dead_routes(unsigned nodes) { int last_row; int node, row; -#if CONFIG_MAX_CPUS > 6 +#if CONFIG_MAX_PHYSICAL_CPUS > 6 if(nodes==8) return;/* don't touch (7,7)*/ #endif last_row = nodes; @@ -1471,12 +1621,38 @@ static void clear_dead_routes(unsigned nodes) fill_row(node, node, (((val & 0xff) | ((val >> 8) & 0xff)) << 16) | 0x0101); } } -#endif /* CONFIG_MAX_CPUS > 1 */ +#endif /* CONFIG_MAX_PHYSICAL_CPUS > 1 */ + +#if CONFIG_LOGICAL_CPUS==1 +static unsigned verify_dualcore(unsigned nodes) +{ + unsigned node, totalcpus, tmp; + + totalcpus = 0; + for (node=0; node<nodes; node++) { + tmp = (pci_read_config32(NODE_MC(node), 0xe8) >> 12) & 3 ; + totalcpus += (tmp + 1); + } + + return totalcpus; + +} +#endif static void coherent_ht_finalize(unsigned nodes) { unsigned node; int rev_a0; +#if CONFIG_LOGICAL_CPUS==1 + unsigned total_cpus; + + if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) == 0) { /* dual_core */ + total_cpus = verify_dualcore(nodes); + } + else { + total_cpus = nodes; + } +#endif /* set up cpu count and node count and enable Limit * Config Space Range for all available CPUs. @@ -1494,7 +1670,11 @@ static void coherent_ht_finalize(unsigned nodes) /* Set the Total CPU and Node count in the system */ val = pci_read_config32(dev, 0x60); val &= (~0x000F0070); +#if CONFIG_LOGICAL_CPUS==1 + val |= ((total_cpus-1)<<16)|((nodes-1)<<4); +#else val |= ((nodes-1)<<16)|((nodes-1)<<4); +#endif pci_write_config32(dev, 0x60, val); /* Only respond to real cpu pci configuration cycles @@ -1557,7 +1737,7 @@ static int apply_cpu_errata_fixes(unsigned nodes, int needs_reset) } } - else { + else if(is_cpu_pre_d0()) { // d0 later don't need it uint32_t cmd_ref; /* Errata 98 * Set Clk Ramp Hystersis to 7 @@ -1607,9 +1787,13 @@ static int setup_coherent_ht_domain(void) { struct setup_smp_result result; +#if K8_HT_CHECK_PENDING_LINK == 1 + //needed? + wait_ht_stable(0); +#endif enable_bsp_routing(); -#if CONFIG_MAX_CPUS > 1 +#if CONFIG_MAX_PHYSICAL_CPUS > 1 result = setup_smp(); result.nodes = verify_mp_capabilities(result.nodes); clear_dead_routes(result.nodes); diff --git a/src/northbridge/amd/amdk8/cpu_rev.c b/src/northbridge/amd/amdk8/cpu_rev.c index 15b4cf6318..1eb47f85f5 100644 --- a/src/northbridge/amd/amdk8/cpu_rev.c +++ b/src/northbridge/amd/amdk8/cpu_rev.c @@ -1,25 +1,46 @@ #include <arch/cpu.h> static int is_cpu_rev_a0(void) { - return (cpuid_eax(1) & 0xffef) == 0x0f00; + return (cpuid_eax(1) & 0xfffef) == 0x0f00; +} +//AMD_D0_SUPPORT +static int is_cpu_pre_d0(void) +{ + return (cpuid_eax(1) & 0xfff0f) < 0x10f00; +} + +static int is_cpu_d0(void) +{ + return (cpuid_eax(1) & 0xfff0f) == 0x10f00; +} + +//AMD_E0_SUPPORT +static int is_cpu_pre_e0(void) +{ + return (cpuid_eax(1) & 0xfff0f) < 0x20f00; +} + +static int is_cpu_e0(void) +{ + return (cpuid_eax(1) & 0xfff00) == 0x20f00; } static int is_cpu_pre_c0(void) { - return (cpuid_eax(1) & 0xffef) < 0x0f48; + return (cpuid_eax(1) & 0xfffef) < 0x0f48; } static int is_cpu_c0(void) { - return (cpuid_eax(1) & 0xffef) == 0x0f48; + return (cpuid_eax(1) & 0xfffef) == 0x0f48; } static int is_cpu_pre_b3(void) { - return (cpuid_eax(1) & 0xffef) < 0x0f41; + return (cpuid_eax(1) & 0xfffef) < 0x0f41; } static int is_cpu_b3(void) { - return (cpuid_eax(1) & 0xffef) == 0x0f41; + return (cpuid_eax(1) & 0xfffef) == 0x0f41; } diff --git a/src/northbridge/amd/amdk8/debug.c b/src/northbridge/amd/amdk8/debug.c index 7686662d04..eeba1e1e2c 100644 --- a/src/northbridge/amd/amdk8/debug.c +++ b/src/northbridge/amd/amdk8/debug.c @@ -27,10 +27,6 @@ static void print_pci_devices(void) continue; } print_debug_pci_dev(dev); - print_debug(" "); - print_debug_hex16(id & 0xffff); - print_debug(" "); - print_debug_hex16((id>>16) & 0xffff); print_debug("\r\n"); } } @@ -113,7 +109,6 @@ static void dump_spd_registers(const struct mem_controller *ctrl) } status = smbus_read_byte(device, j); if (status < 0) { - print_debug("bad device\r\n"); break; } byte = status & 0xff; @@ -139,7 +134,6 @@ static void dump_spd_registers(const struct mem_controller *ctrl) } status = smbus_read_byte(device, j); if (status < 0) { - print_debug("bad device\r\n"); break; } byte = status & 0xff; @@ -152,29 +146,25 @@ static void dump_spd_registers(const struct mem_controller *ctrl) } static void dump_smbus_registers(void) { - int i; + unsigned device; print_debug("\r\n"); - for(i = 1; i < 0x80; i++) { - unsigned device; - device = i; + for(device = 1; device < 0x80; device++) { int j; + if( smbus_read_byte(device, 0) < 0 ) continue; print_debug("smbus: "); print_debug_hex8(device); for(j = 0; j < 256; j++) { int status; unsigned char byte; + status = smbus_read_byte(device, j); + if (status < 0) { + break; + } if ((j & 0xf) == 0) { print_debug("\r\n"); print_debug_hex8(j); print_debug(": "); } - status = smbus_read_byte(device, j); - if (status < 0) { - print_debug("bad device status="); - print_debug_hex32(status); - print_debug("\r\n"); - break; - } byte = status & 0xff; print_debug_hex8(byte); print_debug_char(' '); diff --git a/src/northbridge/amd/amdk8/incoherent_ht.c b/src/northbridge/amd/amdk8/incoherent_ht.c index d76a3e8731..ec767456e8 100644 --- a/src/northbridge/amd/amdk8/incoherent_ht.c +++ b/src/northbridge/amd/amdk8/incoherent_ht.c @@ -123,6 +123,9 @@ static uint16_t ht_read_freq_cap(device_t dev, uint8_t pos) /* AMD K8 Unsupported 1Ghz? */ if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) { + #if K8_HT_FREQ_1G_SUPPORT == 1 + if (is_cpu_pre_e0()) // CK804 support 1G? + #endif freq_cap &= ~(1 << HT_FREQ_1000Mhz); } diff --git a/src/northbridge/amd/amdk8/misc_control.c b/src/northbridge/amd/amdk8/misc_control.c index 379d8b1dc4..4ac6ef473d 100644 --- a/src/northbridge/amd/amdk8/misc_control.c +++ b/src/northbridge/amd/amdk8/misc_control.c @@ -158,7 +158,7 @@ static void misc_control_init(struct device *dev) needs_reset = 1; /* Needed? */ } } - else { + else if(is_cpu_pre_d0()) { uint32_t dcl; f2_dev = dev_find_slot(0, dev->path.u.pci.devfn - 3 + 2); /* Errata 98 @@ -187,7 +187,7 @@ static void misc_control_init(struct device *dev) /* This works on an Athlon64 because unimplemented links return 0 */ reg = 0x98 + (link * 0x20); link_type = pci_read_config32(f0_dev, reg); - if ((link_type & 7) == 3) { /* only handle coherent link here please */ + if ((link_type & 7) == 3) { /* Only handle coherent link here */ cmd &= ~(0xff << (link *8)); /* FIXME this assumes the device on the other side is an AMD device */ cmd |= 0x25 << (link *8); diff --git a/src/northbridge/amd/amdk8/northbridge.c b/src/northbridge/amd/amdk8/northbridge.c index 3d30430d3c..89602f37a6 100644 --- a/src/northbridge/amd/amdk8/northbridge.c +++ b/src/northbridge/amd/amdk8/northbridge.c @@ -1,3 +1,9 @@ +/* This should be done by Eric + 2004.12 yhlu add dual core support + 2005.01 yhlu add support move apic before pci_domain in MB Config.lb + 2005.02 yhlu add e0 memory hole support +*/ + #include <console/console.h> #include <arch/io.h> #include <stdint.h> @@ -9,11 +15,22 @@ #include <string.h> #include <bitops.h> #include <cpu/cpu.h> + +#include <cpu/x86/lapic.h> + +#if CONFIG_LOGICAL_CPUS==1 +#include <cpu/amd/dualcore.h> +#include <pc80/mc146818rtc.h> +#endif + #include "chip.h" #include "root_complex/chip.h" #include "northbridge.h" #include "amdk8.h" -#include <cpu/x86/lapic.h> + +#if K8_E0_MEM_HOLE_SIZEK != 0 +#include "./cpu_rev.c" +#endif #define FX_DEVS 8 static device_t __f0_dev[FX_DEVS]; @@ -387,8 +404,8 @@ static void amdk8_set_resource(device_t dev, struct resource *resource, unsigned limit |= (nodeid & 7); if (dev->link[link].bridge_ctrl & PCI_BRIDGE_CTL_VGA) { - printk_spew("%s, enabling legacy VGA IO forwarding for %s link %s\n", - __func__, dev_path(dev), link); + printk_spew("%s, enabling legacy VGA IO forwarding for %s link %s\n", + __func__, dev_path(dev), link); base |= PCI_IO_BASE_VGA_EN; } if (dev->link[link].bridge_ctrl & PCI_BRIDGE_CTL_NO_ISA) { @@ -440,7 +457,7 @@ static void amdk8_create_vga_resource(device_t dev, unsigned nodeid) break; } } - + printk_spew("%s: link %d has VGA device\n", __func__, link); /* no VGA card installed */ @@ -468,6 +485,7 @@ static void amdk8_create_vga_resource(device_t dev, unsigned nodeid) /* release the temp resource */ resource->flags = 0; + } static void amdk8_set_resources(device_t dev) @@ -652,6 +670,26 @@ static void pci_domain_set_resources(device_t dev) mmio_basek &= ~((64*1024) - 1); #endif +#if K8_E0_MEM_HOLE_SIZEK != 0 + if (!is_cpu_pre_e0()) + for (i = 0; i < 8; i++) { + uint32_t base; + base = f1_read_config32(0x40 + (i << 3)); + if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { + continue; + } + + base = pci_read_config32(__f1_dev[i], 0xf0); + if((base & 1)==0) continue; + base &= 0xff<<24; + base >>= 10; + if (mmio_basek > base) { + mmio_basek = base; + } + break; // only one hole + } +#endif + idx = 10; for (i = 0; i < 8; i++) { uint32_t base, limit; @@ -672,6 +710,7 @@ static void pci_domain_set_resources(device_t dev) sizek = limitk - ((8*64)+(16*16)); } + /* See if I need to split the region to accomodate pci memory space */ if ((basek < mmio_basek) && (limitk > mmio_basek)) { @@ -716,24 +755,36 @@ static struct device_operations pci_domain_ops = { }; #define APIC_ID_OFFSET 0x10 - static unsigned int cpu_bus_scan(device_t dev, unsigned int max) { struct bus *cpu_bus; device_t dev_mc; - int i, j; - int enable_apic_ext_id = 0; - int bsp_apic_id = lapicid(); // bsp apicid - int apic_id_offset = bsp_apic_id; - - dev_mc = dev_find_slot(0, PCI_DEVFN(0x18, 0)); - if (pci_read_config32(dev_mc, 0x68) & ( HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST)) { - enable_apic_ext_id = 1; - if (apic_id_offset==0) { - //bsp apic id is not changed - apic_id_offset = APIC_ID_OFFSET; - } - } + int i,j; + unsigned nb_cfg_54 = 0; + unsigned siblings = 0; + int enable_apic_ext_id = 0; + int bsp_apic_id = lapicid(); // bsp apicid + int apic_id_offset = bsp_apic_id; + +#if CONFIG_LOGICAL_CPUS==1 + int e0_later_single_core; + int disable_siblings = !CONFIG_LOGICAL_CPUS; + get_option(&disable_siblings, "dual_core"); + + // for pre_e0, nb_cfg_54 can not be set, ( even set, when you read it still be 0) + // How can I get the nb_cfg_54 of every node' nb_cfg_54 in bsp??? and differ d0 and e0 single core + + nb_cfg_54 = read_nb_cfg_54(); +#endif + + dev_mc = dev_find_slot(0, PCI_DEVFN(0x18, 0)); + if(pci_read_config32(dev_mc, 0x68) & ( HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST)) { + enable_apic_ext_id = 1; + if(apic_id_offset==0) { //bsp apic id is not changed + apic_id_offset = APIC_ID_OFFSET; + } + } + /* Find which cpus are present */ cpu_bus = &dev->link[0]; @@ -741,60 +792,102 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) device_t dev, cpu; struct device_path cpu_path; - /* Find the cpu's memory controller */ - dev = dev_find_slot(0, PCI_DEVFN(0x18 + i, 3)); - if(!dev) { - // in case in mb Config.lb we move apic cluster before pci_domain and not set that for second CPU + /* Find the cpu's memory controller */ + dev = dev_find_slot(0, PCI_DEVFN(0x18 + i, 3)); + if(!dev) { // in case we move apic cluser before pci_domain and not set that for second CPU for(j=0; j<4; j++) { struct device dummy; - uint32_t id; - dummy.bus = dev_mc->bus; - dummy.path.type = DEVICE_PATH_PCI; + uint32_t id; + dummy.bus = dev_mc->bus; + dummy.path.type = DEVICE_PATH_PCI; dummy.path.u.pci.devfn = PCI_DEVFN(0x18 + i, j); - id = pci_read_config32(&dummy, PCI_VENDOR_ID); - if (id != 0xffffffff && id != 0x00000000 && - id != 0x0000ffff && id != 0xffff0000) { - //create that for it - dev = alloc_dev(dev_mc->bus, &dummy.path); + id = pci_read_config32(&dummy, PCI_VENDOR_ID); + if (id != 0xffffffff && id != 0x00000000 && + id != 0x0000ffff && id != 0xffff0000) { + //create that for it + dev = alloc_dev(dev_mc->bus, &dummy.path); } - } - } - - /* Build the cpu device path */ - cpu_path.type = DEVICE_PATH_APIC; - cpu_path.u.apic.apic_id = i; + } + } - /* See if I can find the cpu */ - cpu = find_dev_path(cpu_bus, &cpu_path); +#if CONFIG_LOGICAL_CPUS==1 + e0_later_single_core = 0; + if((!disable_siblings) && dev && dev->enabled) { + j = (pci_read_config32(dev, 0xe8) >> 12) & 3; //dev is func 3 + + printk_debug(" %s siblings=%d\r\n", dev_path(dev), j); + + if(nb_cfg_54) { + // For e0 single core if nb_cfg_54 is set, apicid will be 0, 2, 4.... + // ----> you can mixed single core e0 and dual core e0 at any sequence + // That is the typical case + + if(j == 0 ){ + e0_later_single_core = is_e0_later_in_bsp(i); // single core + } else { + e0_later_single_core = 0; + } + if(e0_later_single_core) { + printk_debug("\tFound e0 single core\r\n"); + j=1; + } + + if(siblings > j ) { + //actually we can't be here, because d0 nb_cfg_54 can not be set + //even worse is_e0_later_in_bsp() can not find out if it is d0 or e0 - /* Enable the cpu if I have the processor */ - if (dev && dev->enabled) { - if (!cpu) { - cpu = alloc_dev(cpu_bus, &cpu_path); - } - if (cpu) { - cpu->enabled = 1; + die("When NB_CFG_54 is set, if you want to mix e0 (single core and dual core) and single core(pre e0) CPUs, you need to put all the single core (pre e0) CPUs before all the (e0 single or dual core) CPUs\r\n"); + } + else { + siblings = j; + } + } else { + siblings = j; } } - - /* Disable the cpu if I don't have the processor */ - if (cpu && (!dev || !dev->enabled)) { - cpu->enabled = 0; - } - - /* Report what I have done */ - if (cpu) { - if(enable_apic_ext_id) { - if(cpu->path.u.apic.apic_id<apic_id_offset) { - //all add offset except bsp cores - if( (cpu->path.u.apic.apic_id > 0) || (bsp_apic_id!=0) ) - cpu->path.u.apic.apic_id += apic_id_offset; +#endif + +#if CONFIG_LOGICAL_CPUS==1 + for (j = 0; j <= (e0_later_single_core?0:siblings); j++ ) { +#else + for (j = 0; j <= siblings; j++ ) { +#endif + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.u.apic.apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:8); + + /* See if I can find the cpu */ + cpu = find_dev_path(cpu_bus, &cpu_path); + + /* Enable the cpu if I have the processor */ + if (dev && dev->enabled) { + if (!cpu) { + cpu = alloc_dev(cpu_bus, &cpu_path); } - } - printk_debug("CPU: %s %s\n", dev_path(cpu), - cpu->enabled?"enabled":"disabled"); - } + if (cpu) { + cpu->enabled = 1; + } + } + + /* Disable the cpu if I don't have the processor */ + if (cpu && (!dev || !dev->enabled)) { + cpu->enabled = 0; + } + + /* Report what I have done */ + if (cpu) { + if(enable_apic_ext_id) { + if(cpu->path.u.apic.apic_id<apic_id_offset) { //all add offset except bsp core0 + if( (cpu->path.u.apic.apic_id > siblings) || (bsp_apic_id!=0) ) + cpu->path.u.apic.apic_id += apic_id_offset; + } + } + printk_debug("CPU: %s %s\n", + dev_path(cpu), cpu->enabled?"enabled":"disabled"); + } + } //j } + return max; } diff --git a/src/northbridge/amd/amdk8/raminit.c b/src/northbridge/amd/amdk8/raminit.c index e4043493ab..3bdf8c3b11 100644 --- a/src/northbridge/amd/amdk8/raminit.c +++ b/src/northbridge/amd/amdk8/raminit.c @@ -1,3 +1,9 @@ +/* This should be done by Eric + 2004.11 yhlu add 4 rank DIMM support + 2004.12 yhlu add D0 support + 2005.02 yhlu add E0 memory hole support +*/ + #include <cpu/x86/mem.h> #include <cpu/x86/cache.h> #include <cpu/x86/mtrr.h> @@ -7,10 +13,16 @@ #if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0 # error "CONFIG_LB_MEM_TOPK must be a power of 2" #endif + +#ifndef K8_4RANK_DIMM_SUPPORT +#define K8_4RANK_DIMM_SUPPORT 0 +#endif + +#if 1 static void setup_resource_map(const unsigned int *register_values, int max) { int i; - print_debug("setting up resource map...."); +// print_debug("setting up resource map...."); #if 0 print_debug("\r\n"); #endif @@ -37,7 +49,13 @@ static void setup_resource_map(const unsigned int *register_values, int max) pci_write_config32(register_values[i], reg); #endif } - print_debug("done.\r\n"); +// print_debug("done.\r\n"); +} +#endif + +static int controller_present(const struct mem_controller *ctrl) +{ + return pci_read_config32(ctrl->f0, 0) == 0x11001022; } static void sdram_set_registers(const struct mem_controller *ctrl) @@ -504,6 +522,13 @@ static void sdram_set_registers(const struct mem_controller *ctrl) }; int i; int max; + +#if 1 + if (!controller_present(ctrl)) { +// print_debug("No memory controller present\r\n"); + return; + } +#endif print_spew("setting up CPU"); print_spew_hex8(ctrl->node_id); print_spew(" northbridge registers\r\n"); @@ -585,6 +610,11 @@ static int is_registered(const struct mem_controller *ctrl) struct dimm_size { unsigned long side1; unsigned long side2; + unsigned long rows; + unsigned long col; +#if K8_4RANK_DIMM_SUPPORT == 1 + unsigned long rank; +#endif }; static struct dimm_size spd_get_dimm_size(unsigned device) @@ -594,6 +624,11 @@ static struct dimm_size spd_get_dimm_size(unsigned device) int value, low; sz.side1 = 0; sz.side2 = 0; + sz.rows = 0; + sz.col = 0; +#if K8_4RANK_DIMM_SUPPORT == 1 + sz.rank = 0; +#endif /* Note it might be easier to use byte 31 here, it has the DIMM size as * a multiple of 4MB. The way we do it now we can size both @@ -603,11 +638,13 @@ static struct dimm_size spd_get_dimm_size(unsigned device) if (value < 0) goto hw_err; if ((value & 0xf) == 0) goto val_err; sz.side1 += value & 0xf; + sz.rows = value & 0xf; value = spd_read_byte(device, 4); /* columns */ if (value < 0) goto hw_err; if ((value & 0xf) == 0) goto val_err; sz.side1 += value & 0xf; + sz.col = value & 0xf; value = spd_read_byte(device, 17); /* banks */ if (value < 0) goto hw_err; @@ -630,7 +667,12 @@ static struct dimm_size spd_get_dimm_size(unsigned device) value = spd_read_byte(device, 5); /* number of physical banks */ if (value < 0) goto hw_err; if (value == 1) goto out; - if (value != 2) goto val_err; + if ((value != 2) && (value != 4 )) { + goto val_err; + } +#if K8_4RANK_DIMM_SUPPORT == 1 + sz.rank = value; +#endif /* Start with the symmetrical case */ sz.side2 = sz.side1; @@ -646,6 +688,7 @@ static struct dimm_size spd_get_dimm_size(unsigned device) if ((value & 0xff) == 0) goto val_err; sz.side2 -= (value & 0x0f); /* Subtract out columns on side 1 */ sz.side2 += ((value >> 4) & 0x0f); /* Add in columsn on side 2 */ + goto out; val_err: @@ -654,10 +697,22 @@ static struct dimm_size spd_get_dimm_size(unsigned device) hw_err: sz.side1 = 0; sz.side2 = 0; + sz.rows = 0; + sz.col = 0; +#if K8_4RANK_DIMM_SUPPORT == 1 + sz.rank = 0; +#endif out: return sz; } +static const unsigned cs_map_aa[15] = { + /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */ + 0, 1, 3, 6, 0, + 0, 2, 4, 7, 9, + 0, 0, 5, 8,10, +}; + static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index) { uint32_t base0, base1, map; @@ -668,6 +723,11 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz } map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); map &= ~(0xf << (index * 4)); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map &= ~(0xf << ( (index + 2) * 4)); + } +#endif /* For each base register. * Place the dimm size in 32 MB quantities in the bits 31 - 21. @@ -679,9 +739,25 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz /* Make certain side1 of the dimm is at least 32MB */ if (sz.side1 >= (25 +3)) { - map |= (sz.side1 - (25 + 3)) << (index *4); + if(is_cpu_pre_d0()) { + map |= (sz.side1 - (25 + 3)) << (index *4); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4); + } +#endif + } + else { + map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4); + } +#endif + } base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1; } + /* Make certain side2 of the dimm is at least 32MB */ if (sz.side2 >= (25 + 3)) { base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1; @@ -700,12 +776,24 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz /* Set the appropriate DIMM base address register */ pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+0)<<2), base0); pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+1)<<2), base1); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+4)<<2), base0); + pci_write_config32(ctrl->f2, DRAM_CSBASE + (((index << 1)+5)<<2), base1); + } +#endif + pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map); /* Enable the memory clocks for this DIMM */ if (base0) { dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); dch |= DCH_MEMCLK_EN0 << index; +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + dch |= DCH_MEMCLK_EN0 << (index + 2); + } +#endif pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, dch); } } @@ -798,10 +886,28 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) /* 1GB */ (1 << (16 - 4)), /* 2GB */ (1 << (16 - 4)), }; + + static const uint32_t csbase_low_d0[] = { + /* 32MB */ (1 << (13 - 4)), + /* 64MB */ (1 << (14 - 4)), + /* 128MB */ (1 << (14 - 4)), + /* 128MB */ (1 << (15 - 4)), + /* 256MB */ (1 << (15 - 4)), + /* 512MB */ (1 << (15 - 4)), + /* 256MB */ (1 << (16 - 4)), + /* 512MB */ (1 << (16 - 4)), + /* 1GB */ (1 << (16 - 4)), + /* 1GB */ (1 << (17 - 4)), + /* 2GB */ (1 << (17 - 4)), + }; + + /* cs_base_high is not changed */ + uint32_t csbase_inc; int chip_selects, index; int bits; unsigned common_size; + unsigned common_cs_mode; uint32_t csbase, csmask; /* See if all of the memory chip selects are the same size @@ -809,8 +915,10 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) */ chip_selects = 0; common_size = 0; + common_cs_mode = 0; for(index = 0; index < 8; index++) { unsigned size; + unsigned cs_mode; uint32_t value; value = pci_read_config32(ctrl->f2, DRAM_CSBASE + (index << 2)); @@ -828,26 +936,50 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) if (common_size != size) { return 0; } + + value = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); + cs_mode =( value >> ((index>>1)*4)) & 0xf; + if(cs_mode == 0 ) continue; + if(common_cs_mode == 0) { + common_cs_mode = cs_mode; + } + /* The size differed fail */ + if(common_cs_mode != cs_mode) { + return 0; + } } + /* Chip selects can only be interleaved when there is * more than one and their is a power of two of them. */ bits = log2(chip_selects); if (((1 << bits) != chip_selects) || (bits < 1) || (bits > 3)) { return 0; - - } - /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */ - if ((bits == 3) && (common_size == (1 << (32 - 3)))) { - print_debug("8 4GB chip selects cannot be interleaved\r\n"); - return 0; } + /* Find the bits of csbase that we need to interleave on */ - if (is_dual_channel(ctrl)) { - csbase_inc = csbase_low[log2(common_size) - 1] << 1; - } else { - csbase_inc = csbase_low[log2(common_size)]; + if(is_cpu_pre_d0()){ + csbase_inc = csbase_low[common_cs_mode]; + if(is_dual_channel(ctrl)) { + /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */ + if ((bits == 3) && (common_size == (1 << (32 - 3)))) { +// print_debug("8 4GB chip selects cannot be interleaved\r\n"); + return 0; + } + csbase_inc <<=1; + } + } + else { + csbase_inc = csbase_low_d0[common_cs_mode]; + if(is_dual_channel(ctrl)) { + if( (bits==3) && (common_cs_mode > 8)) { +// print_debug("8 cs_mode>8 chip selects cannot be interleaved\r\n"); + return 0; + } + csbase_inc <<=1; + } } + /* Compute the initial values for csbase and csbask. * In csbase just set the enable bit and the base to zero. * In csmask set the mask bits for the size and page level interleave. @@ -877,7 +1009,7 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) static unsigned long order_chip_selects(const struct mem_controller *ctrl) { unsigned long tom; - + /* Remember which registers we have used in the high 8 bits of tom */ tom = 0; for(;;) { @@ -960,6 +1092,25 @@ unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id) return end_k; } +#if K8_E0_MEM_HOLE_SIZEK != 0 +#define K8_E0_MEM_HOLE_LIMITK 4*1024*1024 +#define K8_E0_MEM_HOLE_BASEK (K8_E0_MEM_HOLE_LIMITK - K8_E0_MEM_HOLE_SIZEK ) + +static void set_e0_mem_hole(const struct mem_controller *ctrl, unsigned base_k) +{ + /* Route the addresses to the controller node */ + unsigned val; + + val = pci_read_config32(ctrl->f1,0xf0); + + val &= 0x00ff00fe; + val = (K8_E0_MEM_HOLE_BASEK << 10) | ((K8_E0_MEM_HOLE_SIZEK+base_k)>>(16-10)) | 1; + + pci_write_config32(ctrl->f1, 0xf0, val); +} + +#endif + static void order_dimms(const struct mem_controller *ctrl) { unsigned long tom_k, base_k; @@ -976,6 +1127,14 @@ static void order_dimms(const struct mem_controller *ctrl) /* Compute the memory base address */ base_k = memory_end_k(ctrl, ctrl->node_id); tom_k += base_k; +#if K8_E0_MEM_HOLE_SIZEK != 0 + if(!is_cpu_pre_e0()) { + /* See if I need to check the range cover hole */ + if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (tom_k > K8_E0_MEM_HOLE_BASEK)) { + tom_k += K8_E0_MEM_HOLE_SIZEK; + } + } +#endif route_dram_accesses(ctrl, base_k, tom_k); set_top_mem(tom_k); } @@ -1020,9 +1179,12 @@ static long spd_handle_unbuffered_dimms(const struct mem_controller *ctrl, long if (unbuffered && registered) { die("Mixed buffered and registered dimms not supported"); } +#if 1 + //By yhlu for debug Athlon64 939 can do dual channel, but it use unbuffer DIMM if (unbuffered && is_opteron(ctrl)) { die("Unbuffered Dimms not supported on Opteron"); } +#endif dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); dcl &= ~DCL_UnBufDimm; @@ -1360,12 +1522,46 @@ static struct spd_set_memclk_result spd_set_memclk(const struct mem_controller * dimm_err: dimm_mask = disable_dimm(ctrl, i, dimm_mask); } +#if 0 +//down speed for full load 4 rank support +#if K8_4RANK_DIMM_SUPPORT + if(dimm_mask == (3|(3<<DIMM_SOCKETS)) ) { + int ranks = 4; + for(i = 0; (i < 4) && (ctrl->channel0[i]); i++) { + int val; + if (!(dimm_mask & (1 << i))) { + continue; + } + val = spd_read_byte(ctrl->channel0[i], 5); + if(val!=ranks) { + ranks = val; + break; + } + } + if(ranks==4) { + if(min_cycle_time <= 0x50 ) { + min_cycle_time = 0x60; + } + } + + } +#endif +#endif /* Now that I know the minimum cycle time lookup the memory parameters */ result.param = get_mem_param(min_cycle_time); /* Update DRAM Config High with our selected memory speed */ value = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT); +#if 1 + /* Improves DQS centering by correcting for case when core speed multiplier and MEMCLK speed result in odd clock divisor, by selecting the next lowest memory speed, required only at DDR400 and higher speeds with certain DIMM loadings ---- cheating???*/ + if(!is_cpu_pre_e0()) { + if(min_cycle_time==0x50) { + value |= 1<<31; + } + } +#endif + value |= result.param->dch_memclk; pci_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, value); @@ -1594,17 +1790,32 @@ static int update_dimm_x4(const struct mem_controller *ctrl, const struct mem_pa { uint32_t dcl; int value; +#if K8_4RANK_DIMM_SUPPORT == 1 + int rank; +#endif int dimm; value = spd_read_byte(ctrl->channel0[i], 13); if (value < 0) { return -1; } - dimm = i; - dimm += DCL_x4DIMM_SHIFT; + +#if K8_4RANK_DIMM_SUPPORT == 1 + rank = spd_read_byte(ctrl->channel0[i], 5); /* number of physical banks */ + if (rank < 0) { + return -1; + } +#endif + + dimm = 1<<(DCL_x4DIMM_SHIFT+i); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(rank==4) { + dimm |= 1<<(DCL_x4DIMM_SHIFT+i+2); + } +#endif dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); - dcl &= ~(1 << dimm); + dcl &= ~dimm; if (value == 4) { - dcl |= (1 << dimm); + dcl |= dimm; } pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); return 1; @@ -1921,10 +2132,6 @@ static long spd_set_dram_timing(const struct mem_controller *ctrl, const struct return dimm_mask; } -static int controller_present(const struct mem_controller *ctrl) -{ - return pci_read_config32(ctrl->f0, 0) == 0x11001022; -} static void sdram_set_spd_registers(const struct mem_controller *ctrl) { struct spd_set_memclk_result result; @@ -1932,7 +2139,7 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl) long dimm_mask; #if 1 if (!controller_present(ctrl)) { - print_debug("No memory controller present\r\n"); +// print_debug("No memory controller present\r\n"); return; } #endif @@ -2064,16 +2271,37 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl) dcl = pci_read_config32(ctrl[i].f2, DRAM_CONFIG_LOW); } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) ); } + + // init e0 mem hole here +#if K8_E0_MEM_HOLE_SIZEK != 0 + if (!is_cpu_pre_e0()) { + uint32_t base, limit; + unsigned base_k, limit_k; + base = pci_read_config32(ctrl->f1, 0x40 + (i << 3)); + limit = pci_read_config32(ctrl->f1, 0x44 + (i << 3)); + base_k = (base & 0xffff0000) >> 2; + limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2; + if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (limit_k > K8_E0_MEM_HOLE_BASEK)) { + set_e0_mem_hole(ctrl+i, base_k); + } + } + +#endif + print_debug(" done\r\n"); } + //FIXME add enable node interleaving here --yhlu + /*needed? + 1. check how many nodes we have , if not all has ram installed get out + 2. check cs_base lo is 0, node 0 f2 0x40,,,,, if any one is not using lo is CS_BASE, get out + 3. check if other node is the same as node 0 about f2 0x40,,,,, otherwise get out + 4. if all ready enable node_interleaving in f1 0x40..... of every node + 5. for node interleaving we need to set mem hole to every node ( need recalcute hole offset in f0 for every node) + */ + + /* Make certain the first 1M of memory is intialized */ - msr_t msr, msr_201; - uint32_t cnt; - - /* Save the value of msr_201 */ - msr_201 = rdmsr(0x201); - print_debug("Clearing initial memory region: "); /* Use write combine caching while we setup the first 1M */ |