From 530b5193e477d1756598700805b35e8a129a241f Mon Sep 17 00:00:00 2001 From: Eric Biederman Date: Tue, 1 Jul 2003 10:05:30 +0000 Subject: - Massive set of cleanups/fixes for romcc. Lots of corner cases now work properly. And a few long standing bugs have been rooted out and removed. git-svn-id: svn://svn.coreboot.org/coreboot/trunk@931 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1 --- util/romcc/Makefile | 18 +- util/romcc/romcc.c | 1520 ++++++++++++++++++++++++++------------ util/romcc/tests/raminit_test5.c | 1392 ++++++++++++++++++++++++++++++++++ util/romcc/tests/simple_test48.c | 13 + util/romcc/tests/simple_test49.c | 15 + util/romcc/tests/simple_test50.c | 43 ++ util/romcc/tests/simple_test51.c | 11 + util/romcc/tests/simple_test52.c | 5 + util/romcc/tests/simple_test53.c | 10 + util/romcc/tests/simple_test54.c | 771 +++++++++++++++++++ 10 files changed, 3312 insertions(+), 486 deletions(-) create mode 100644 util/romcc/tests/raminit_test5.c create mode 100644 util/romcc/tests/simple_test48.c create mode 100644 util/romcc/tests/simple_test49.c create mode 100644 util/romcc/tests/simple_test50.c create mode 100644 util/romcc/tests/simple_test51.c create mode 100644 util/romcc/tests/simple_test52.c create mode 100644 util/romcc/tests/simple_test53.c create mode 100644 util/romcc/tests/simple_test54.c (limited to 'util/romcc') diff --git a/util/romcc/Makefile b/util/romcc/Makefile index c6b654eb4c..58778218ed 100644 --- a/util/romcc/Makefile +++ b/util/romcc/Makefile @@ -1,5 +1,5 @@ -VERSION:=0.32 -RELEASE_DATE:=28 June 2003 +VERSION:=0.33 +RELEASE_DATE:=1 July 2003 PACKAGE:=romcc @@ -67,10 +67,18 @@ TESTS=\ simple_test45.c \ simple_test46.c \ simple_test47.c \ + simple_test48.c \ + simple_test49.c \ + simple_test50.c \ + simple_test51.c \ + simple_test52.c \ + simple_test53.c \ + simple_test54.c \ raminit_test.c \ raminit_test2.c \ raminit_test3.c \ - raminit_test4.c + raminit_test4.c \ + raminit_test5.c FAIL_TESTS = \ fail_test1.c @@ -85,7 +93,7 @@ FAIL_OUT:=$(patsubst %.c, tests/%.out, $(FAIL_TESTS)) $(TEST_ASM): %.S: %.c romcc - export ALLOC_CHECK_=2; ./romcc -mcpu=k8 -O -o $@ $< > $*.debug + export ALLOC_CHECK_=2; ./romcc -O -mcpu=k8 -o $@ $< > $*.debug $(FAIL_OUT): %.out: %.c romcc export ALLOC_CHECK_=2; if ./romcc -O -o $*.S $< > $*.debug 2> $@ ; then exit 1 ; else exit 0 ; fi @@ -96,7 +104,7 @@ $(TEST_OBJ): %.o: %.S $(TEST_ELF): %.elf: %.o tests/ldscript.ld ld -T tests/ldscript.ld $< -o $@ -test: $(TEST_ELF) +test: $(TEST_ELF) $(FAIL_OUT) echo: echo "TEST_SRCS=$(TEST_SRCS)" diff --git a/util/romcc/romcc.c b/util/romcc/romcc.c index 6f3335fe49..7d380d681a 100644 --- a/util/romcc/romcc.c +++ b/util/romcc/romcc.c @@ -17,9 +17,13 @@ #define DEBUG_CONSISTENCY 2 #define DEBUG_RANGE_CONFLICTS 0 #define DEBUG_COALESCING 0 +#define DEBUG_SDP_BLOCKS 0 +#define DEBUG_TRIPLE_COLOR 0 #warning "FIXME boundary cases with small types in larger registers" #warning "FIXME give clear error messages about unused variables" +#warning "FIXME properly handle multi dimensional arrays" +#warning "FIXME fix scc_transform" /* Control flow graph of a loop without goto. * @@ -261,23 +265,25 @@ struct token { /* Operations on general purpose registers. */ -#define OP_SMUL 0 -#define OP_UMUL 1 -#define OP_SDIV 2 -#define OP_UDIV 3 -#define OP_SMOD 4 -#define OP_UMOD 5 -#define OP_ADD 6 -#define OP_SUB 7 -#define OP_SL 8 -#define OP_USR 9 -#define OP_SSR 10 -#define OP_AND 11 -#define OP_XOR 12 -#define OP_OR 13 -#define OP_POS 14 /* Dummy positive operator don't use it */ -#define OP_NEG 15 -#define OP_INVERT 16 +#define OP_SDIVT 0 +#define OP_UDIVT 1 +#define OP_SMUL 2 +#define OP_UMUL 3 +#define OP_SDIV 4 +#define OP_UDIV 5 +#define OP_SMOD 6 +#define OP_UMOD 7 +#define OP_ADD 8 +#define OP_SUB 9 +#define OP_SL 10 +#define OP_USR 11 +#define OP_SSR 12 +#define OP_AND 13 +#define OP_XOR 14 +#define OP_OR 15 +#define OP_POS 16 /* Dummy positive operator don't use it */ +#define OP_NEG 17 +#define OP_INVERT 18 #define OP_EQ 20 #define OP_NOTEQ 21 @@ -295,6 +301,10 @@ struct token { #define OP_LOAD 32 #define OP_STORE 33 +/* For OP_STORE ->type holds the type + * RHS(0) holds the destination address + * RHS(1) holds the value to store. + */ #define OP_NOOP 34 @@ -318,8 +328,8 @@ struct token { #define OP_WRITE 60 /* OP_WRITE moves one pseudo register to another. - * LHS(0) holds the destination pseudo register, which must be an OP_DECL. - * RHS(0) holds the psuedo to move. + * RHS(0) holds the destination pseudo register, which must be an OP_DECL. + * RHS(1) holds the psuedo to move. */ #define OP_READ 61 @@ -509,6 +519,8 @@ struct op_info { .targ = (TARG), \ } static const struct op_info table_ops[] = { +[OP_SDIVT ] = OP( 2, 2, 0, 0, PURE | BLOCK , "sdivt"), +[OP_UDIVT ] = OP( 2, 2, 0, 0, PURE | BLOCK , "udivt"), [OP_SMUL ] = OP( 0, 2, 0, 0, PURE | DEF | BLOCK , "smul"), [OP_UMUL ] = OP( 0, 2, 0, 0, PURE | DEF | BLOCK , "umul"), [OP_SDIV ] = OP( 0, 2, 0, 0, PURE | DEF | BLOCK , "sdiv"), @@ -541,7 +553,7 @@ static const struct op_info table_ops[] = { [OP_LTRUE ] = OP( 0, 1, 0, 0, PURE | DEF | BLOCK , "ltrue"), [OP_LOAD ] = OP( 0, 1, 0, 0, IMPURE | DEF | BLOCK, "load"), -[OP_STORE ] = OP( 1, 1, 0, 0, IMPURE | BLOCK , "store"), +[OP_STORE ] = OP( 0, 2, 0, 0, IMPURE | BLOCK , "store"), [OP_NOOP ] = OP( 0, 0, 0, 0, PURE | BLOCK, "noop"), @@ -549,7 +561,7 @@ static const struct op_info table_ops[] = { [OP_BLOBCONST ] = OP( 0, 0, 0, 0, PURE, "blobconst"), [OP_ADDRCONST ] = OP( 0, 0, 1, 0, PURE | DEF, "addrconst"), -[OP_WRITE ] = OP( 1, 1, 0, 0, PURE | BLOCK, "write"), +[OP_WRITE ] = OP( 0, 2, 0, 0, PURE | BLOCK, "write"), [OP_READ ] = OP( 0, 1, 0, 0, PURE | DEF | BLOCK, "read"), [OP_COPY ] = OP( 0, 1, 0, 0, PURE | DEF | BLOCK, "copy"), [OP_PIECE ] = OP( 0, 0, 1, 0, PURE | DEF, "piece"), @@ -864,9 +876,9 @@ struct type { #define MAX_REG_EQUIVS 16 #define REGISTER_BITS 16 #define MAX_VIRT_REGISTERS (1<occurance) { struct occurance *spot; spot = triple->occurance; while(spot->parent) { @@ -1557,7 +1569,7 @@ static struct triple *post_triple(struct compile_state *state, } /* If I have a left hand side skip over it */ zlhs = TRIPLE_LHS(base->sizes); - if (zlhs && (base->op != OP_WRITE) && (base->op != OP_STORE)) { + if (zlhs) { base = LHS(base, zlhs - 1); } @@ -1626,6 +1638,14 @@ static void display_triple(FILE *fp, struct triple *ins) ptr->col); } fprintf(fp, "\n"); +#if 0 + { + struct triple_set *user; + for(user = ptr->use; user; user = user->next) { + fprintf(fp, "use: %p\n", user->member); + } + } +#endif fflush(fp); } @@ -1656,6 +1676,23 @@ static int triple_is_branch(struct compile_state *state, struct triple *ins) return is_branch; } +static int triple_is_cond_branch(struct compile_state *state, struct triple *ins) +{ + /* A conditional branch has the condition argument as a single + * RHS parameter. + */ + return triple_is_branch(state, ins) && + (TRIPLE_RHS(ins->sizes) == 1); +} + +static int triple_is_uncond_branch(struct compile_state *state, struct triple *ins) +{ + /* A unconditional branch has no RHS parameters. + */ + return triple_is_branch(state, ins) && + (TRIPLE_RHS(ins->sizes) == 0); +} + static int triple_is_def(struct compile_state *state, struct triple *ins) { /* This function is used to determine which triples need @@ -4191,6 +4228,10 @@ static int equiv_types(struct type *left, struct type *right) return 0; } type = left->type & TYPE_MASK; + /* If the basic types match and it is a void type we are done */ + if (type == TYPE_VOID) { + return 1; + } /* if the basic types match and it is an arithmetic type we are done */ if (TYPE_ARITHMETIC(type)) { return 1; @@ -5132,13 +5173,6 @@ static struct triple *flatten( return ptr; } switch(ptr->op) { - case OP_WRITE: - case OP_STORE: - RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0)); - LHS(ptr, 0) = flatten(state, first, LHS(ptr, 0)); - use_triple(LHS(ptr, 0), ptr); - use_triple(RHS(ptr, 0), ptr); - break; case OP_COMMA: RHS(ptr, 0) = flatten(state, first, RHS(ptr, 0)); ptr = RHS(ptr, 1); @@ -5474,6 +5508,22 @@ static int is_one(struct triple *ins) return is_const(ins) && (ins->u.cval == 1); } +static long_t bit_count(ulong_t value) +{ + int count; + int i; + count = 0; + for(i = (sizeof(ulong_t)*8) -1; i >= 0; i--) { + ulong_t mask; + mask = 1; + mask <<= i; + if (value & mask) { + count++; + } + } + return count; + +} static long_t bsr(ulong_t value) { int i; @@ -5700,8 +5750,8 @@ static void flatten_structures(struct compile_state *state) ulong_t i; op = ins->op; - src = RHS(ins, 0); - dst = LHS(ins, 0); + src = RHS(ins, 1); + dst = RHS(ins, 0); get_occurance(ins->occurance); next = alloc_triple(state, OP_VAL_VEC, ins->type, -1, -1, ins->occurance); @@ -5949,13 +5999,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins) RHS(ins, 1) = tmp; } if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) { - if (!is_pointer(RHS(ins, 0))) { + if (RHS(ins, 0)->op == OP_INTCONST) { ulong_t left, right; left = read_const(state, ins, &RHS(ins, 0)); right = read_const(state, ins, &RHS(ins, 1)); mkconst(state, ins, left + right); } - else /* op == OP_ADDRCONST */ { + else if (RHS(ins, 0)->op == OP_ADDRCONST) { struct triple *sdecl; ulong_t left, right; sdecl = MISC(RHS(ins, 0), 0); @@ -5963,6 +6013,9 @@ static void simplify_add(struct compile_state *state, struct triple *ins) right = RHS(ins, 1)->u.cval; mkaddr_const(state, ins, sdecl, left + right); } + else { + internal_warning(state, ins, "Optimize me!"); + } } else if (is_const(RHS(ins, 0)) && !is_const(RHS(ins, 1))) { struct triple *tmp; @@ -5975,13 +6028,13 @@ static void simplify_add(struct compile_state *state, struct triple *ins) static void simplify_sub(struct compile_state *state, struct triple *ins) { if (is_const(RHS(ins, 0)) && is_const(RHS(ins, 1))) { - if (!is_pointer(RHS(ins, 0))) { + if (RHS(ins, 0)->op == OP_INTCONST) { ulong_t left, right; left = read_const(state, ins, &RHS(ins, 0)); right = read_const(state, ins, &RHS(ins, 1)); mkconst(state, ins, left - right); } - else /* op == OP_ADDRCONST */ { + else if (RHS(ins, 0)->op == OP_ADDRCONST) { struct triple *sdecl; ulong_t left, right; sdecl = MISC(RHS(ins, 0), 0); @@ -5989,6 +6042,9 @@ static void simplify_sub(struct compile_state *state, struct triple *ins) right = RHS(ins, 1)->u.cval; mkaddr_const(state, ins, sdecl, left - right); } + else { + internal_warning(state, ins, "Optimize me!"); + } } } @@ -6372,6 +6428,65 @@ static void simplify_branch(struct compile_state *state, struct triple *ins) } } +int phi_present(struct block *block) +{ + struct triple *ptr; + if (!block) { + return 0; + } + ptr = block->first; + do { + if (ptr->op == OP_PHI) { + return 1; + } + ptr = ptr->next; + } while(ptr != block->last); + return 0; +} + +static void simplify_label(struct compile_state *state, struct triple *ins) +{ +#warning "FIXME enable simplify_label" + struct triple *first, *last; + first = RHS(state->main_function, 0); + last = first->prev; + /* Ignore the first and last instructions */ + if ((ins == first) || (ins == last)) { + return; + } + if (ins->use == 0) { + ins->op = OP_NOOP; + } + else if (ins->prev->op == OP_LABEL) { + struct block *block; + block = ins->prev->u.block; + /* In general it is not safe to merge one label that + * imediately follows another. The problem is that the empty + * looking block may have phi functions that depend on it. + */ + if (!block || + (!phi_present(block->left) && + !phi_present(block->right))) + { + struct triple_set *user, *next; + ins->op = OP_NOOP; + for(user = ins->use; user; user = next) { + struct triple *use; + next = user->next; + use = user->member; + if (TARG(use, 0) == ins) { + TARG(use, 0) = ins->prev; + unuse_triple(ins, use); + use_triple(ins->prev, use); + } + } + if (ins->use) { + internal_error(state, ins, "noop use != 0"); + } + } + } +} + static void simplify_phi(struct compile_state *state, struct triple *ins) { struct triple **expr; @@ -6414,6 +6529,10 @@ static void simplify_bsr(struct compile_state *state, struct triple *ins) typedef void (*simplify_t)(struct compile_state *state, struct triple *ins); static const simplify_t table_simplify[] = { +#if 1 +#define simplify_sdivt simplify_noop +#define simplify_udivt simplify_noop +#endif #if 0 #define simplify_smul simplify_noop #define simplify_umul simplify_noop @@ -6472,6 +6591,9 @@ static const simplify_t table_simplify[] = { #if 0 #define simplify_branch simplify_noop #endif +#if 1 +#define simplify_label simplify_noop +#endif #if 0 #define simplify_phi simplify_noop @@ -6482,6 +6604,8 @@ static const simplify_t table_simplify[] = { #define simplify_bsr simplify_noop #endif +[OP_SDIVT ] = simplify_sdivt, +[OP_UDIVT ] = simplify_udivt, [OP_SMUL ] = simplify_smul, [OP_UMUL ] = simplify_umul, [OP_SDIV ] = simplify_sdiv, @@ -6533,7 +6657,7 @@ static const simplify_t table_simplify[] = { [OP_LIST ] = simplify_noop, [OP_BRANCH ] = simplify_branch, -[OP_LABEL ] = simplify_noop, +[OP_LABEL ] = simplify_label, [OP_ADECL ] = simplify_noop, [OP_SDECL ] = simplify_noop, [OP_PHI ] = simplify_phi, @@ -6581,7 +6705,7 @@ static void simplify_all(struct compile_state *state) do { simplify(state, ins); ins = ins->next; - } while(ins != first); + }while(ins != first); } /* @@ -6758,8 +6882,32 @@ static struct type *register_builtin_type(struct compile_state *state, static void register_builtins(struct compile_state *state) { + struct type *div_type, *ldiv_type; + struct type *udiv_type, *uldiv_type; struct type *msr_type; + div_type = register_builtin_type(state, "__builtin_div_t", + partial_struct(state, "quot", &int_type, + partial_struct(state, "rem", &int_type, 0))); + ldiv_type = register_builtin_type(state, "__builtin_ldiv_t", + partial_struct(state, "quot", &long_type, + partial_struct(state, "rem", &long_type, 0))); + udiv_type = register_builtin_type(state, "__builtin_udiv_t", + partial_struct(state, "quot", &uint_type, + partial_struct(state, "rem", &uint_type, 0))); + uldiv_type = register_builtin_type(state, "__builtin_uldiv_t", + partial_struct(state, "quot", &ulong_type, + partial_struct(state, "rem", &ulong_type, 0))); + + register_builtin_function(state, "__builtin_div", OP_SDIVT, div_type, + &int_type, &int_type); + register_builtin_function(state, "__builtin_ldiv", OP_SDIVT, ldiv_type, + &long_type, &long_type); + register_builtin_function(state, "__builtin_udiv", OP_UDIVT, udiv_type, + &uint_type, &uint_type); + register_builtin_function(state, "__builtin_uldiv", OP_UDIVT, uldiv_type, + &ulong_type, &ulong_type); + register_builtin_function(state, "__builtin_inb", OP_INB, &uchar_type, &ushort_type); register_builtin_function(state, "__builtin_inw", OP_INW, &ushort_type, @@ -8404,24 +8552,23 @@ static struct type *enum_specifier( return type; } -#if 0 static struct type *struct_declarator( struct compile_state *state, struct type *type, struct hash_entry **ident) { int tok; -#warning "struct_declarator is complicated because of bitfields, kill them?" tok = peek(state); if (tok != TOK_COLON) { type = declarator(state, type, ident, 1); } if ((tok == TOK_COLON) || (peek(state) == TOK_COLON)) { + struct triple *value; eat(state, TOK_COLON); - constant_expr(state); + value = constant_expr(state); +#warning "FIXME implement bitfields to reduce register usage" + error(state, 0, "bitfields not yet implemented"); } - FINISHME(); return type; } -#endif static struct type *struct_or_union_specifier( struct compile_state *state, unsigned int spec) @@ -8466,7 +8613,7 @@ static struct type *struct_or_union_specifier( struct type *type; struct hash_entry *fident; done = 1; - type = declarator(state, base_type, &fident, 1); + type = struct_declarator(state, base_type, &fident); elements++; if (peek(state) == TOK_COMMA) { done = 0; @@ -9363,14 +9510,6 @@ static int do_print_triple(struct compile_state *state, struct triple *ins, int if ((ins->op == OP_BRANCH) && ins->use) { internal_error(state, ins, "branch used?"); } -#if 0 - { - struct triple_set *user; - for(user = ins->use; user; user = user->next) { - printf("use: %p\n", user->member); - } - } -#endif if (triple_is_branch(state, ins)) { printf("\n"); } @@ -9497,13 +9636,16 @@ static void walk_blocks(struct compile_state *state, ptr = first; do { struct block *block; - if (ptr->op == OP_LABEL) { + if (triple_stores_block(state, ptr)) { block = ptr->u.block; if (block && (block != last_block)) { cb(state, block, arg); } last_block = block; } + if (block && (block->last == ptr)) { + block = 0; + } ptr = ptr->next; } while(ptr != first); } @@ -9511,10 +9653,11 @@ static void walk_blocks(struct compile_state *state, static void print_block( struct compile_state *state, struct block *block, void *arg) { + struct block_set *user; struct triple *ptr; FILE *fp = arg; - fprintf(fp, "\nblock: %p (%d), %p<-%p %p<-%p\n", + fprintf(fp, "\nblock: %p (%d) %p<-%p %p<-%p\n", block, block->vertex, block->left, @@ -9525,51 +9668,17 @@ static void print_block( fprintf(fp, "%p:\n", block->first); } for(ptr = block->first; ; ptr = ptr->next) { - struct triple_set *user; - int op = ptr->op; - - if (triple_stores_block(state, ptr)) { - if (ptr->u.block != block) { - internal_error(state, ptr, - "Wrong block pointer: %p\n", - ptr->u.block); - } - } - if (op == OP_ADECL) { - for(user = ptr->use; user; user = user->next) { - if (!user->member->u.block) { - internal_error(state, user->member, - "Use %p not in a block?\n", - user->member); - } - } - } display_triple(fp, ptr); - -#if 0 - for(user = ptr->use; user; user = user->next) { - fprintf(fp, "use: %p\n", user->member); - } -#endif - - /* Sanity checks... */ - valid_ins(state, ptr); - for(user = ptr->use; user; user = user->next) { - struct triple *use; - use = user->member; - valid_ins(state, use); - if (triple_stores_block(state, user->member) && - !user->member->u.block) { - internal_error(state, user->member, - "Use %p not in a block?", - user->member); - } - } - if (ptr == block->last) break; } - fprintf(fp,"\n"); + fprintf(fp, "users %d: ", block->users); + for(user = block->use; user; user = user->next) { + fprintf(fp, "%p (%d) ", + user->member, + user->member->vertex); + } + fprintf(fp,"\n\n"); } @@ -9595,6 +9704,9 @@ static void prune_nonblock_triples(struct compile_state *state) if (!block) { release_triple(state, ins); } + if (block && block->last == ins) { + block = 0; + } ins = next; } while(ins != first); } @@ -9615,10 +9727,6 @@ static void setup_basic_blocks(struct compile_state *state) if (!state->last_block) { internal_error(state, 0, "end not used?"); } - /* Insert an extra unused edge from start to the end - * This helps with reverse control flow calculations. - */ - use_block(state->first_block, state->last_block); /* If we are debugging print what I have just done */ if (state->debug & DEBUG_BASIC_BLOCKS) { print_blocks(state, stdout); @@ -9770,7 +9878,8 @@ static int initialize_sdblock(struct sdom_block *sd, return vertex; } -static int initialize_sdpblock(struct sdom_block *sd, +static int initialize_sdpblock( + struct compile_state *state, struct sdom_block *sd, struct block *parent, struct block *block, int vertex) { struct block_set *user; @@ -9787,7 +9896,38 @@ static int initialize_sdpblock(struct sdom_block *sd, sd[vertex].ancestor = 0; sd[vertex].vertex = vertex; for(user = block->use; user; user = user->next) { - vertex = initialize_sdpblock(sd, block, user->member, vertex); + vertex = initialize_sdpblock(state, sd, block, user->member, vertex); + } + return vertex; +} + +static int setup_sdpblocks(struct compile_state *state, struct sdom_block *sd) +{ + struct block *block; + int vertex; + /* Setup as many sdpblocks as possible without using fake edges */ + vertex = initialize_sdpblock(state, sd, 0, state->last_block, 0); + + /* Walk through the graph and find unconnected blocks. If + * we can, add a fake edge from the unconnected blocks to the + * end of the graph. + */ + block = state->first_block->last->next->u.block; + for(; block && block != state->first_block; block = block->last->next->u.block) { + if (sd[block->vertex].block == block) { + continue; + } + if (block->left != 0) { + continue; + } + +#if DEBUG_SDP_BLOCKS + fprintf(stderr, "Adding %d\n", vertex +1); +#endif + + block->left = state->last_block; + use_block(block->left, block); + vertex = initialize_sdpblock(state, sd, state->last_block, block, vertex); } return vertex; } @@ -10027,10 +10167,15 @@ static void find_immediate_dominators(struct compile_state *state) static void find_post_dominators(struct compile_state *state) { struct sdom_block *sd; + int vertex; /* Step 1 initialize the basic block information */ sd = xcmalloc(sizeof(*sd) * (state->last_vertex + 1), "sdom_state"); - initialize_sdpblock(sd, 0, state->last_block, 0); + vertex = setup_sdpblocks(state, sd); + if (vertex != state->last_vertex) { + internal_error(state, 0, "missing %d blocks\n", + state->last_vertex - vertex); + } /* Step 2 compute the semidominators */ /* Step 3 implicitly define the immediate dominator of each vertex */ @@ -10440,8 +10585,8 @@ static void rename_block_variables( /* LHS(A) */ if (ptr->op == OP_WRITE) { struct triple *var, *val, *tval; - var = LHS(ptr, 0); - tval = val = RHS(ptr, 0); + var = RHS(ptr, 0); + tval = val = RHS(ptr, 1); if ((val->op == OP_WRITE) || (val->op == OP_READ)) { internal_error(state, val, "bad value in write"); } @@ -10456,7 +10601,7 @@ static void rename_block_variables( use_triple(val, tval); } unuse_triple(val, ptr); - RHS(ptr, 0) = tval; + RHS(ptr, 1) = tval; use_triple(tval, ptr); } propogate_use(state, ptr, tval); @@ -10491,9 +10636,9 @@ static void rename_block_variables( } if (ptr->op == OP_WRITE) { struct triple *var; - var = LHS(ptr, 0); + var = RHS(ptr, 0); /* Pop OP_WRITE ptr->right from the stack of variable uses */ - pop_triple(var, RHS(ptr, 0)); + pop_triple(var, RHS(ptr, 1)); release_triple(state, ptr); continue; } @@ -10645,6 +10790,7 @@ static void transform_from_ssa_form(struct compile_state *state) unuse_triple(phi, use->member); } +#warning "CHECK_ME does the OP_ADECL need to be placed somewhere that dominates all of the incoming phi edges?" /* A variable to replace the phi function */ var = post_triple(state, phi, OP_ADECL, phi->type, 0,0); /* A read of the single value that is set into the variable */ @@ -10659,7 +10805,7 @@ static void transform_from_ssa_form(struct compile_state *state) for(edge = 0, set = block->use; set; set = set->next, edge++) { struct block *eblock; struct triple *move; - struct triple *val; + struct triple *val, *base; eblock = set->member; val = slot[edge]; slot[edge] = 0; @@ -10671,20 +10817,21 @@ static void transform_from_ssa_form(struct compile_state *state) continue; } - move = post_triple(state, - val, OP_WRITE, phi->type, var, val); + /* Make certain the write is placed in the edge block... */ + base = eblock->first; + if (block_of_triple(state, val) == eblock) { + base = val; + } + move = post_triple(state, base, OP_WRITE, phi->type, var, val); use_triple(val, move); use_triple(var, move); } /* See if there are any writers of var */ used = 0; for(use = var->use; use; use = use->next) { - struct triple **expr; - expr = triple_lhs(state, use->member, 0); - for(; expr; expr = triple_lhs(state, use->member, expr)) { - if (*expr == var) { - used = 1; - } + if ((use->member->op == OP_WRITE) && + (RHS(use->member, 0) == var)) { + used = 1; } } /* If var is not used free it */ @@ -10792,7 +10939,7 @@ static struct reg_info find_lhs_post_color( struct triple_set *set; struct reg_info info; struct triple *lhs; -#if 0 +#if DEBUG_TRIPLE_COLOR fprintf(stderr, "find_lhs_post_color(%p, %d)\n", ins, index); #endif @@ -10836,7 +10983,7 @@ static struct reg_info find_lhs_post_color( info.regcm &= rinfo.regcm; } } -#if 0 +#if DEBUG_TRIPLE_COLOR fprintf(stderr, "find_lhs_post_color(%p, %d) -> ( %d, %x)\n", ins, index, info.reg, info.regcm); #endif @@ -10848,7 +10995,7 @@ static struct reg_info find_rhs_post_color( { struct reg_info info, rinfo; int zlhs, i; -#if 0 +#if DEBUG_TRIPLE_COLOR fprintf(stderr, "find_rhs_post_color(%p, %d)\n", ins, index); #endif @@ -10871,7 +11018,7 @@ static struct reg_info find_rhs_post_color( if (tinfo.reg >= MAX_REGISTERS) { tinfo.reg = REG_UNSET; } - info.regcm &= linfo.reg; + info.regcm &= linfo.regcm; info.regcm &= tinfo.regcm; if (info.reg != REG_UNSET) { internal_error(state, ins, "register conflict"); @@ -10882,7 +11029,7 @@ static struct reg_info find_rhs_post_color( info.reg = tinfo.reg; } } -#if 0 +#if DEBUG_TRIPLE_COLOR fprintf(stderr, "find_rhs_post_color(%p, %d) -> ( %d, %x)\n", ins, index, info.reg, info.regcm); #endif @@ -10893,7 +11040,7 @@ static struct reg_info find_lhs_color( struct compile_state *state, struct triple *ins, int index) { struct reg_info pre, post, info; -#if 0 +#if DEBUG_TRIPLE_COLOR fprintf(stderr, "find_lhs_color(%p, %d)\n", ins, index); #endif @@ -10909,9 +11056,10 @@ static struct reg_info find_lhs_color( if (info.reg == REG_UNSET) { info.reg = post.reg; } -#if 0 - fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x)\n", - ins, index, info.reg, info.regcm); +#if DEBUG_TRIPLE_COLOR + fprintf(stderr, "find_lhs_color(%p, %d) -> ( %d, %x) ... (%d, %x) (%d, %x)\n", + ins, index, info.reg, info.regcm, + pre.reg, pre.regcm, post.reg, post.regcm); #endif return info; } @@ -11503,6 +11651,12 @@ static void eliminate_inefectual_code(struct compile_state *state) if (!triple_is_pure(state, ins) || triple_is_branch(state, ins)) { awaken(state, dtriple, &ins, &work_list_tail); } +#if 1 + /* Unconditionally keep the very last instruction */ + else if (ins->next == first) { + awaken(state, dtriple, &ins, &work_list_tail); + } +#endif i++; ins = ins->next; } while(ins != first); @@ -11829,7 +11983,6 @@ static void print_interference_block( fprintf(fp, "%p:\n", block->first); } for(done = 0, ptr = block->first; !done; ptr = ptr->next) { - struct triple_set *user; struct live_range *lr; unsigned id; int op; @@ -11837,23 +11990,6 @@ static void print_interference_block( done = (ptr == block->last); lr = rstate->lrd[ptr->id].lr; - if (triple_stores_block(state, ptr)) { - if (ptr->u.block != block) { - internal_error(state, ptr, - "Wrong block pointer: %p", - ptr->u.block); - } - } - if (op == OP_ADECL) { - for(user = ptr->use; user; user = user->next) { - if (!user->member->u.block) { - internal_error(state, user->member, - "Use %p not in a block?", - user->member); - } - - } - } id = ptr->id; ptr->id = rstate->lrd[id].orig_id; SET_REG(ptr->id, lr->color); @@ -11895,23 +12031,6 @@ static void print_interference_block( internal_error(state, ptr, "Invalid triple id: %d", ptr->id); } - for(user = ptr->use; user; user = user->next) { - struct triple *use; - struct live_range *ulr; - use = user->member; - valid_ins(state, use); - if ((use->id < 0) || (use->id > rstate->defs)) { - internal_error(state, use, "Invalid triple id: %d", - use->id); - } - ulr = rstate->lrd[user->member->id].lr; - if (triple_stores_block(state, user->member) && - !user->member->u.block) { - internal_error(state, user->member, - "Use %p not in a block?", - user->member); - } - } } if (rb->out) { struct triple_reg_set *out_set; @@ -12451,7 +12570,6 @@ static void initialize_live_ranges( #if DEBUG_COALESCING > 1 fprintf(stderr, "mandatory coalesce: %p %d %d\n", ins, zlhs, zrhs); - #endif for(i = 0; i < zlhs; i++) { struct reg_info linfo; @@ -13028,6 +13146,10 @@ struct triple *find_constrained_def( * least dominated one first. */ if (is_constrained) { +#if DEBUG_RANGE_CONFLICTS + fprintf(stderr, "canidate: %p %-8s regcm: %x %x\n", + lrd->def, tops(lrd->def->op), regcm, info.regcm); +#endif if (!constrained || tdominates(state, lrd->def, constrained)) { @@ -13060,8 +13182,8 @@ static int split_constrained_ranges( constrained = find_constrained_def(state, range, constrained); } #if DEBUG_RANGE_CONFLICTS - fprintf(stderr, "constrained: %s %p\n", - tops(constrained->op), constrained); + fprintf(stderr, "constrained: %p %-8s\n", + constrained, tops(constrained->op)); #endif if (constrained) { ids_from_rstate(state, rstate); @@ -13107,7 +13229,6 @@ static int split_ranges( return split; } - #if DEBUG_COLOR_GRAPH > 1 #define cgdebug_printf(...) fprintf(stdout, __VA_ARGS__) #define cgdebug_flush() fflush(stdout) @@ -13165,8 +13286,6 @@ static int select_free_color(struct compile_state *state, } #endif -#warning "FIXME detect conflicts caused by the source and destination being the same register" - /* If a color is already assigned see if it will work */ if (range->color != REG_UNSET) { struct live_range_def *lrd; @@ -13207,6 +13326,7 @@ static int select_free_color(struct compile_state *state, entry = lrd->def->use; for(;(range->color == REG_UNSET) && entry; entry = entry->next) { struct live_range_def *insd; + unsigned regcm; insd = &rstate->lrd[entry->member->id]; if (insd->lr->defs == 0) { continue; @@ -13215,8 +13335,11 @@ static int select_free_color(struct compile_state *state, !interfere(rstate, range, insd->lr)) { phi = insd; } - if ((insd->lr->color == REG_UNSET) || - ((insd->lr->classes & range->classes) == 0) || + if (insd->lr->color == REG_UNSET) { + continue; + } + regcm = insd->lr->classes; + if (((regcm & range->classes) == 0) || (used[insd->lr->color])) { continue; } @@ -13239,12 +13362,16 @@ static int select_free_color(struct compile_state *state, expr = triple_rhs(state, phi->def, 0); for(; expr; expr = triple_rhs(state, phi->def, expr)) { struct live_range *lr; + unsigned regcm; if (!*expr) { continue; } lr = rstate->lrd[(*expr)->id].lr; - if ((lr->color == REG_UNSET) || - ((lr->classes & range->classes) == 0) || + if (lr->color == REG_UNSET) { + continue; + } + regcm = lr->classes; + if (((regcm & range->classes) == 0) || (used[lr->color])) { continue; } @@ -13261,12 +13388,16 @@ static int select_free_color(struct compile_state *state, expr = triple_rhs(state, lrd->def, 0); for(; expr; expr = triple_rhs(state, lrd->def, expr)) { struct live_range *lr; + unsigned regcm; if (!*expr) { continue; } lr = rstate->lrd[(*expr)->id].lr; - if ((lr->color == -1) || - ((lr->classes & range->classes) == 0) || + if (lr->color == REG_UNSET) { + continue; + } + regcm = lr->classes; + if (((regcm & range->classes) == 0) || (used[lr->color])) { continue; } @@ -13322,8 +13453,8 @@ static int select_free_color(struct compile_state *state, internal_error(state, range->defs->def, "too few registers"); #endif } - range->classes = arch_reg_regcm(state, range->color); - if (range->color == -1) { + range->classes &= arch_reg_regcm(state, range->color); + if ((range->color == REG_UNSET) || (range->classes == 0)) { internal_error(state, range->defs->def, "select_free_color did not?"); } return 1; @@ -14503,6 +14634,7 @@ static void verify_blocks_present(struct compile_state *state) first = RHS(state->main_function, 0); ins = first; do { + valid_ins(state, ins); if (triple_stores_block(state, ins)) { if (!ins->u.block) { internal_error(state, ins, @@ -14518,19 +14650,73 @@ static void verify_blocks(struct compile_state *state) { struct triple *ins; struct block *block; + int blocks; block = state->first_block; if (!block) { return; } + blocks = 0; do { + int users; + struct block_set *user; + blocks++; for(ins = block->first; ins != block->last->next; ins = ins->next) { - if (!triple_stores_block(state, ins)) { + if (triple_stores_block(state, ins) && (ins->u.block != block)) { + internal_error(state, ins, "inconsitent block specified"); + } + valid_ins(state, ins); + } + users = 0; + for(user = block->use; user; user = user->next) { + users++; + if ((block == state->last_block) && + (user->member == state->first_block)) { continue; } - if (ins->u.block != block) { - internal_error(state, ins, "inconsitent block specified"); + if ((user->member->left != block) && + (user->member->right != block)) { + internal_error(state, user->member->first, + "user does not use block"); + } + } + if (triple_is_branch(state, block->last) && + (block->right != block_of_triple(state, TARG(block->last, 0)))) + { + internal_error(state, block->last, "block->right != TARG(0)"); + } + if (!triple_is_uncond_branch(state, block->last) && + (block != state->last_block) && + (block->left != block_of_triple(state, block->last->next))) + { + internal_error(state, block->last, "block->left != block->last->next"); + } + if (block->left) { + for(user = block->left->use; user; user = user->next) { + if (user->member == block) { + break; + } + } + if (!user || user->member != block) { + internal_error(state, block->first, + "block does not use left"); + } + } + if (block->right) { + for(user = block->right->use; user; user = user->next) { + if (user->member == block) { + break; + } + } + if (!user || user->member != block) { + internal_error(state, block->first, + "block does not use right"); } } + if (block->users != users) { + internal_error(state, block->first, + "computed users %d != stored users %d\n", + users, block->users); + } if (!triple_stores_block(state, block->last->next)) { internal_error(state, block->last->next, "cannot find next block"); @@ -14541,6 +14727,10 @@ static void verify_blocks(struct compile_state *state) "bad next block"); } } while(block != state->first_block); + if (blocks != state->last_vertex) { + internal_error(state, 0, "computed blocks != stored blocks %d\n", + blocks, state->last_vertex); + } } static void verify_domination(struct compile_state *state) @@ -14585,9 +14775,6 @@ static void verify_piece(struct compile_state *state) struct triple *ptr; int lhs, i; lhs = TRIPLE_LHS(ins->sizes); - if ((ins->op == OP_WRITE) || (ins->op == OP_STORE)) { - lhs = 0; - } for(ptr = ins->next, i = 0; i < lhs; i++, ptr = ptr->next) { if (ptr != LHS(ins, i)) { internal_error(state, ins, "malformed lhs on %s", @@ -14644,8 +14831,18 @@ static void optimize(struct compile_state *state) analyze_idominators(state); analyze_ipdominators(state); - /* Transform the code to ssa form */ + /* Transform the code to ssa form. */ + /* + * The transformation to ssa form puts a phi function + * on each of edge of a dominance frontier where that + * phi function might be needed. At -O2 if we don't + * eleminate the excess phi functions we can get an + * exponential code size growth. So I kill the extra + * phi functions early and I kill them often. + */ transform_to_ssa_form(state); + eliminate_inefectual_code(state); + verify_consistency(state); if (state->debug & DEBUG_CODE_ELIMINATION) { fprintf(stdout, "After transform_to_ssa_form\n"); @@ -14654,11 +14851,21 @@ static void optimize(struct compile_state *state) /* Do strength reduction and simple constant optimizations */ if (state->optimize >= 1) { simplify_all(state); + transform_from_ssa_form(state); + free_basic_blocks(state); + setup_basic_blocks(state); + analyze_idominators(state); + analyze_ipdominators(state); + transform_to_ssa_form(state); + eliminate_inefectual_code(state); + } + if (state->debug & DEBUG_CODE_ELIMINATION) { + fprintf(stdout, "After simplify_all\n"); + print_blocks(state, stdout); } verify_consistency(state); /* Propogate constants throughout the code */ if (state->optimize >= 2) { -#warning "FIXME fix scc_transform" scc_transform(state); transform_from_ssa_form(state); free_basic_blocks(state); @@ -14666,6 +14873,7 @@ static void optimize(struct compile_state *state) analyze_idominators(state); analyze_ipdominators(state); transform_to_ssa_form(state); + eliminate_inefectual_code(state); } verify_consistency(state); #warning "WISHLIST implement single use constants (least possible register pressure)" @@ -14781,37 +14989,41 @@ static void print_op_asm(struct compile_state *state, #define CPU_DEFAULT CPU_I386 /* The x86 register classes */ -#define REGC_FLAGS 0 -#define REGC_GPR8 1 -#define REGC_GPR16 2 -#define REGC_GPR32 3 -#define REGC_GPR64 4 -#define REGC_MMX 5 -#define REGC_XMM 6 -#define REGC_GPR32_8 7 -#define REGC_GPR16_8 8 -#define REGC_IMM32 9 -#define REGC_IMM16 10 -#define REGC_IMM8 11 +#define REGC_FLAGS 0 +#define REGC_GPR8 1 +#define REGC_GPR16 2 +#define REGC_GPR32 3 +#define REGC_DIVIDEND64 4 +#define REGC_DIVIDEND32 5 +#define REGC_MMX 6 +#define REGC_XMM 7 +#define REGC_GPR32_8 8 +#define REGC_GPR16_8 9 +#define REGC_GPR8_LO 10 +#define REGC_IMM32 11 +#define REGC_IMM16 12 +#define REGC_IMM8 13 #define LAST_REGC REGC_IMM8 #if LAST_REGC >= MAX_REGC #error "MAX_REGC is to low" #endif /* Register class masks */ -#define REGCM_FLAGS (1 << REGC_FLAGS) -#define REGCM_GPR8 (1 << REGC_GPR8) -#define REGCM_GPR16 (1 << REGC_GPR16) -#define REGCM_GPR32 (1 << REGC_GPR32) -#define REGCM_GPR64 (1 << REGC_GPR64) -#define REGCM_MMX (1 << REGC_MMX) -#define REGCM_XMM (1 << REGC_XMM) -#define REGCM_GPR32_8 (1 << REGC_GPR32_8) -#define REGCM_GPR16_8 (1 << REGC_GPR16_8) -#define REGCM_IMM32 (1 << REGC_IMM32) -#define REGCM_IMM16 (1 << REGC_IMM16) -#define REGCM_IMM8 (1 << REGC_IMM8) -#define REGCM_ALL ((1 << (LAST_REGC + 1)) - 1) +#define REGCM_FLAGS (1 << REGC_FLAGS) +#define REGCM_GPR8 (1 << REGC_GPR8) +#define REGCM_GPR16 (1 << REGC_GPR16) +#define REGCM_GPR32 (1 << REGC_GPR32) +#define REGCM_DIVIDEND64 (1 << REGC_DIVIDEND64) +#define REGCM_DIVIDEND32 (1 << REGC_DIVIDEND32) +#define REGCM_MMX (1 << REGC_MMX) +#define REGCM_XMM (1 << REGC_XMM) +#define REGCM_GPR32_8 (1 << REGC_GPR32_8) +#define REGCM_GPR16_8 (1 << REGC_GPR16_8) +#define REGCM_GPR8_LO (1 << REGC_GPR8_LO) +#define REGCM_IMM32 (1 << REGC_IMM32) +#define REGCM_IMM16 (1 << REGC_IMM16) +#define REGCM_IMM8 (1 << REGC_IMM8) +#define REGCM_ALL ((1 << (LAST_REGC + 1)) - 1) /* The x86 registers */ #define REG_EFLAGS 2 @@ -14825,12 +15037,10 @@ static void print_op_asm(struct compile_state *state, #define REG_BH 8 #define REG_CH 9 #define REG_DH 10 +#define REGC_GPR8_LO_FIRST REG_AL +#define REGC_GPR8_LO_LAST REG_DL #define REGC_GPR8_FIRST REG_AL -#if X86_4_8BIT_GPRS -#define REGC_GPR8_LAST REG_DL -#else #define REGC_GPR8_LAST REG_DH -#endif #define REG_AX 11 #define REG_BX 12 #define REG_CX 13 @@ -14852,26 +15062,29 @@ static void print_op_asm(struct compile_state *state, #define REGC_GPR32_FIRST REG_EAX #define REGC_GPR32_LAST REG_ESP #define REG_EDXEAX 27 -#define REGC_GPR64_FIRST REG_EDXEAX -#define REGC_GPR64_LAST REG_EDXEAX -#define REG_MMX0 28 -#define REG_MMX1 29 -#define REG_MMX2 30 -#define REG_MMX3 31 -#define REG_MMX4 32 -#define REG_MMX5 33 -#define REG_MMX6 34 -#define REG_MMX7 35 +#define REGC_DIVIDEND64_FIRST REG_EDXEAX +#define REGC_DIVIDEND64_LAST REG_EDXEAX +#define REG_DXAX 28 +#define REGC_DIVIDEND32_FIRST REG_DXAX +#define REGC_DIVIDEND32_LAST REG_DXAX +#define REG_MMX0 29 +#define REG_MMX1 30 +#define REG_MMX2 31 +#define REG_MMX3 32 +#define REG_MMX4 33 +#define REG_MMX5 34 +#define REG_MMX6 35 +#define REG_MMX7 36 #define REGC_MMX_FIRST REG_MMX0 #define REGC_MMX_LAST REG_MMX7 -#define REG_XMM0 36 -#define REG_XMM1 37 -#define REG_XMM2 38 -#define REG_XMM3 39 -#define REG_XMM4 40 -#define REG_XMM5 41 -#define REG_XMM6 42 -#define REG_XMM7 43 +#define REG_XMM0 37 +#define REG_XMM1 38 +#define REG_XMM2 39 +#define REG_XMM3 40 +#define REG_XMM4 41 +#define REG_XMM5 42 +#define REG_XMM6 43 +#define REG_XMM7 44 #define REGC_XMM_FIRST REG_XMM0 #define REGC_XMM_LAST REG_XMM7 #warning "WISHLIST figure out how to use pinsrw and pextrw to better use extended regs" @@ -14895,35 +15108,39 @@ static void print_op_asm(struct compile_state *state, static unsigned regc_size[LAST_REGC +1] = { - [REGC_FLAGS] = REGC_FLAGS_LAST - REGC_FLAGS_FIRST + 1, - [REGC_GPR8] = REGC_GPR8_LAST - REGC_GPR8_FIRST + 1, - [REGC_GPR16] = REGC_GPR16_LAST - REGC_GPR16_FIRST + 1, - [REGC_GPR32] = REGC_GPR32_LAST - REGC_GPR32_FIRST + 1, - [REGC_GPR64] = REGC_GPR64_LAST - REGC_GPR64_FIRST + 1, - [REGC_MMX] = REGC_MMX_LAST - REGC_MMX_FIRST + 1, - [REGC_XMM] = REGC_XMM_LAST - REGC_XMM_FIRST + 1, - [REGC_GPR32_8] = REGC_GPR32_8_LAST - REGC_GPR32_8_FIRST + 1, - [REGC_GPR16_8] = REGC_GPR16_8_LAST - REGC_GPR16_8_FIRST + 1, - [REGC_IMM32] = 0, - [REGC_IMM16] = 0, - [REGC_IMM8] = 0, + [REGC_FLAGS] = REGC_FLAGS_LAST - REGC_FLAGS_FIRST + 1, + [REGC_GPR8] = REGC_GPR8_LAST - REGC_GPR8_FIRST + 1, + [REGC_GPR16] = REGC_GPR16_LAST - REGC_GPR16_FIRST + 1, + [REGC_GPR32] = REGC_GPR32_LAST - REGC_GPR32_FIRST + 1, + [REGC_DIVIDEND64] = REGC_DIVIDEND64_LAST - REGC_DIVIDEND64_FIRST + 1, + [REGC_DIVIDEND32] = REGC_DIVIDEND32_LAST - REGC_DIVIDEND32_FIRST + 1, + [REGC_MMX] = REGC_MMX_LAST - REGC_MMX_FIRST + 1, + [REGC_XMM] = REGC_XMM_LAST - REGC_XMM_FIRST + 1, + [REGC_GPR32_8] = REGC_GPR32_8_LAST - REGC_GPR32_8_FIRST + 1, + [REGC_GPR16_8] = REGC_GPR16_8_LAST - REGC_GPR16_8_FIRST + 1, + [REGC_GPR8_LO] = REGC_GPR8_LO_LAST - REGC_GPR8_LO_FIRST + 1, + [REGC_IMM32] = 0, + [REGC_IMM16] = 0, + [REGC_IMM8] = 0, }; static const struct { int first, last; } regcm_bound[LAST_REGC + 1] = { - [REGC_FLAGS] = { REGC_FLAGS_FIRST, REGC_FLAGS_LAST }, - [REGC_GPR8] = { REGC_GPR8_FIRST, REGC_GPR8_LAST }, - [REGC_GPR16] = { REGC_GPR16_FIRST, REGC_GPR16_LAST }, - [REGC_GPR32] = { REGC_GPR32_FIRST, REGC_GPR32_LAST }, - [REGC_GPR64] = { REGC_GPR64_FIRST, REGC_GPR64_LAST }, - [REGC_MMX] = { REGC_MMX_FIRST, REGC_MMX_LAST }, - [REGC_XMM] = { REGC_XMM_FIRST, REGC_XMM_LAST }, - [REGC_GPR32_8] = { REGC_GPR32_8_FIRST, REGC_GPR32_8_LAST }, - [REGC_GPR16_8] = { REGC_GPR16_8_FIRST, REGC_GPR16_8_LAST }, - [REGC_IMM32] = { REGC_IMM32_FIRST, REGC_IMM32_LAST }, - [REGC_IMM16] = { REGC_IMM16_FIRST, REGC_IMM16_LAST }, - [REGC_IMM8] = { REGC_IMM8_FIRST, REGC_IMM8_LAST }, + [REGC_FLAGS] = { REGC_FLAGS_FIRST, REGC_FLAGS_LAST }, + [REGC_GPR8] = { REGC_GPR8_FIRST, REGC_GPR8_LAST }, + [REGC_GPR16] = { REGC_GPR16_FIRST, REGC_GPR16_LAST }, + [REGC_GPR32] = { REGC_GPR32_FIRST, REGC_GPR32_LAST }, + [REGC_DIVIDEND64] = { REGC_DIVIDEND64_FIRST, REGC_DIVIDEND64_LAST }, + [REGC_DIVIDEND32] = { REGC_DIVIDEND32_FIRST, REGC_DIVIDEND32_LAST }, + [REGC_MMX] = { REGC_MMX_FIRST, REGC_MMX_LAST }, + [REGC_XMM] = { REGC_XMM_FIRST, REGC_XMM_LAST }, + [REGC_GPR32_8] = { REGC_GPR32_8_FIRST, REGC_GPR32_8_LAST }, + [REGC_GPR16_8] = { REGC_GPR16_8_FIRST, REGC_GPR16_8_LAST }, + [REGC_GPR8_LO] = { REGC_GPR8_LO_FIRST, REGC_GPR8_LO_LAST }, + [REGC_IMM32] = { REGC_IMM32_FIRST, REGC_IMM32_LAST }, + [REGC_IMM16] = { REGC_IMM16_FIRST, REGC_IMM16_LAST }, + [REGC_IMM8] = { REGC_IMM8_FIRST, REGC_IMM8_LAST }, }; static int arch_encode_cpu(const char *cpu) @@ -14959,8 +15176,9 @@ static unsigned arch_regc_size(struct compile_state *state, int class) static int arch_regcm_intersect(unsigned regcm1, unsigned regcm2) { /* See if two register classes may have overlapping registers */ - unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 | - REGCM_GPR32_8 | REGCM_GPR32 | REGCM_GPR64; + unsigned gpr_mask = REGCM_GPR8 | REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 | + REGCM_GPR32_8 | REGCM_GPR32 | + REGCM_DIVIDEND32 | REGCM_DIVIDEND64; /* Special case for the immediates */ if ((regcm1 & (REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8)) && @@ -14987,6 +15205,7 @@ static void arch_reg_equivs( #endif *equiv++ = REG_AX; *equiv++ = REG_EAX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_AH: @@ -14995,6 +15214,7 @@ static void arch_reg_equivs( #endif *equiv++ = REG_AX; *equiv++ = REG_EAX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_BL: @@ -15033,6 +15253,7 @@ static void arch_reg_equivs( #endif *equiv++ = REG_DX; *equiv++ = REG_EDX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_DH: @@ -15041,12 +15262,14 @@ static void arch_reg_equivs( #endif *equiv++ = REG_DX; *equiv++ = REG_EDX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_AX: *equiv++ = REG_AL; *equiv++ = REG_AH; *equiv++ = REG_EAX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_BX: @@ -15063,6 +15286,7 @@ static void arch_reg_equivs( *equiv++ = REG_DL; *equiv++ = REG_DH; *equiv++ = REG_EDX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_SI: @@ -15081,6 +15305,7 @@ static void arch_reg_equivs( *equiv++ = REG_AL; *equiv++ = REG_AH; *equiv++ = REG_AX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_EBX: @@ -15097,6 +15322,7 @@ static void arch_reg_equivs( *equiv++ = REG_DL; *equiv++ = REG_DH; *equiv++ = REG_DX; + *equiv++ = REG_DXAX; *equiv++ = REG_EDXEAX; break; case REG_ESI: @@ -15111,6 +15337,17 @@ static void arch_reg_equivs( case REG_ESP: *equiv++ = REG_SP; break; + case REG_DXAX: + *equiv++ = REG_AL; + *equiv++ = REG_AH; + *equiv++ = REG_DL; + *equiv++ = REG_DH; + *equiv++ = REG_AX; + *equiv++ = REG_DX; + *equiv++ = REG_EAX; + *equiv++ = REG_EDX; + *equiv++ = REG_EDXEAX; + break; case REG_EDXEAX: *equiv++ = REG_AL; *equiv++ = REG_AH; @@ -15120,6 +15357,7 @@ static void arch_reg_equivs( *equiv++ = REG_DX; *equiv++ = REG_EAX; *equiv++ = REG_EDX; + *equiv++ = REG_DXAX; break; } *equiv++ = REG_UNSET; @@ -15128,8 +15366,10 @@ static void arch_reg_equivs( static unsigned arch_avail_mask(struct compile_state *state) { unsigned avail_mask; - avail_mask = REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16 | - REGCM_GPR32 | REGCM_GPR32_8 | REGCM_GPR64 | + /* REGCM_GPR8 is not available */ + avail_mask = REGCM_GPR8_LO | REGCM_GPR16_8 | REGCM_GPR16 | + REGCM_GPR32 | REGCM_GPR32_8 | + REGCM_DIVIDEND32 | REGCM_DIVIDEND64 | REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 | REGCM_FLAGS; switch(state->cpu) { case CPU_P3: @@ -15141,12 +15381,6 @@ static unsigned arch_avail_mask(struct compile_state *state) avail_mask |= REGCM_MMX | REGCM_XMM; break; } -#if 0 - /* Don't enable 8 bit values until I can force both operands - * to be 8bits simultaneously. - */ - avail_mask &= ~(REGCM_GPR8 | REGCM_GPR16_8 | REGCM_GPR16); -#endif return avail_mask; } @@ -15155,7 +15389,6 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm unsigned mask, result; int class, class2; result = regcm; - result &= arch_avail_mask(state); for(class = 0, mask = 1; mask; mask <<= 1, class++) { if ((result & mask) == 0) { @@ -15171,6 +15404,7 @@ static unsigned arch_regcm_normalize(struct compile_state *state, unsigned regcm } } } + result &= arch_avail_mask(state); return result; } @@ -15209,19 +15443,19 @@ static struct reg_info arch_reg_constraint( unsigned int mask; unsigned int reg; } constraints[] = { - { 'r', REGCM_GPR32, REG_UNSET }, - { 'g', REGCM_GPR32, REG_UNSET }, - { 'p', REGCM_GPR32, REG_UNSET }, - { 'q', REGCM_GPR8, REG_UNSET }, + { 'r', REGCM_GPR32, REG_UNSET }, + { 'g', REGCM_GPR32, REG_UNSET }, + { 'p', REGCM_GPR32, REG_UNSET }, + { 'q', REGCM_GPR8_LO, REG_UNSET }, { 'Q', REGCM_GPR32_8, REG_UNSET }, - { 'x', REGCM_XMM, REG_UNSET }, - { 'y', REGCM_MMX, REG_UNSET }, - { 'a', REGCM_GPR32, REG_EAX }, - { 'b', REGCM_GPR32, REG_EBX }, - { 'c', REGCM_GPR32, REG_ECX }, - { 'd', REGCM_GPR32, REG_EDX }, - { 'D', REGCM_GPR32, REG_EDI }, - { 'S', REGCM_GPR32, REG_ESI }, + { 'x', REGCM_XMM, REG_UNSET }, + { 'y', REGCM_MMX, REG_UNSET }, + { 'a', REGCM_GPR32, REG_EAX }, + { 'b', REGCM_GPR32, REG_EBX }, + { 'c', REGCM_GPR32, REG_ECX }, + { 'd', REGCM_GPR32, REG_EDX }, + { 'D', REGCM_GPR32, REG_EDI }, + { 'S', REGCM_GPR32, REG_ESI }, { '\0', 0, REG_UNSET }, }; unsigned int regcm; @@ -15368,7 +15602,13 @@ static int arch_select_free_register( for(i = REGC_GPR8_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LAST); i++) { reg = do_select_reg(state, used, i, classes); } - for(i = REGC_GPR64_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR64_LAST); i++) { + for(i = REGC_GPR8_LO_FIRST; (reg == REG_UNSET) && (i <= REGC_GPR8_LO_LAST); i++) { + reg = do_select_reg(state, used, i, classes); + } + for(i = REGC_DIVIDEND32_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND32_LAST); i++) { + reg = do_select_reg(state, used, i, classes); + } + for(i = REGC_DIVIDEND64_FIRST; (reg == REG_UNSET) && (i <= REGC_DIVIDEND64_LAST); i++) { reg = do_select_reg(state, used, i, classes); } for(i = REGC_FLAGS_FIRST; (reg == REG_UNSET) && (i <= REGC_FLAGS_LAST); i++) { @@ -15390,10 +15630,10 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ break; case TYPE_CHAR: case TYPE_UCHAR: - mask = REGCM_GPR8 | + mask = REGCM_GPR8 | REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR16_8 | REGCM_GPR32 | REGCM_GPR32_8 | - REGCM_GPR64 | + REGCM_DIVIDEND32 | REGCM_DIVIDEND64 | REGCM_MMX | REGCM_XMM | REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8; break; @@ -15401,7 +15641,7 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ case TYPE_USHORT: mask = REGCM_GPR16 | REGCM_GPR16_8 | REGCM_GPR32 | REGCM_GPR32_8 | - REGCM_GPR64 | + REGCM_DIVIDEND32 | REGCM_DIVIDEND64 | REGCM_MMX | REGCM_XMM | REGCM_IMM32 | REGCM_IMM16; break; @@ -15411,7 +15651,8 @@ static unsigned arch_type_to_regcm(struct compile_state *state, struct type *typ case TYPE_ULONG: case TYPE_POINTER: mask = REGCM_GPR32 | REGCM_GPR32_8 | - REGCM_GPR64 | REGCM_MMX | REGCM_XMM | + REGCM_DIVIDEND32 | REGCM_DIVIDEND64 | + REGCM_MMX | REGCM_XMM | REGCM_IMM32; break; default: @@ -15469,63 +15710,79 @@ static int get_imm8(struct triple *ins, struct triple **expr) return 1; } -#define TEMPLATE_NOP 0 -#define TEMPLATE_INTCONST8 1 -#define TEMPLATE_INTCONST32 2 -#define TEMPLATE_COPY8_REG 3 -#define TEMPLATE_COPY16_REG 4 -#define TEMPLATE_COPY32_REG 5 -#define TEMPLATE_COPY_IMM8 6 -#define TEMPLATE_COPY_IMM16 7 -#define TEMPLATE_COPY_IMM32 8 -#define TEMPLATE_PHI8 9 -#define TEMPLATE_PHI16 10 -#define TEMPLATE_PHI32 11 -#define TEMPLATE_STORE8 12 -#define TEMPLATE_STORE16 13 -#define TEMPLATE_STORE32 14 -#define TEMPLATE_LOAD8 15 -#define TEMPLATE_LOAD16 16 -#define TEMPLATE_LOAD32 17 -#define TEMPLATE_BINARY_REG 18 -#define TEMPLATE_BINARY_IMM 19 -#define TEMPLATE_SL_CL 20 -#define TEMPLATE_SL_IMM 21 -#define TEMPLATE_UNARY 22 -#define TEMPLATE_CMP_REG 23 -#define TEMPLATE_CMP_IMM 24 -#define TEMPLATE_TEST 25 -#define TEMPLATE_SET 26 -#define TEMPLATE_JMP 27 -#define TEMPLATE_INB_DX 28 -#define TEMPLATE_INB_IMM 29 -#define TEMPLATE_INW_DX 30 -#define TEMPLATE_INW_IMM 31 -#define TEMPLATE_INL_DX 32 -#define TEMPLATE_INL_IMM 33 -#define TEMPLATE_OUTB_DX 34 -#define TEMPLATE_OUTB_IMM 35 -#define TEMPLATE_OUTW_DX 36 -#define TEMPLATE_OUTW_IMM 37 -#define TEMPLATE_OUTL_DX 38 -#define TEMPLATE_OUTL_IMM 39 -#define TEMPLATE_BSF 40 -#define TEMPLATE_RDMSR 41 -#define TEMPLATE_WRMSR 42 -#define TEMPLATE_UMUL 43 -#define TEMPLATE_DIV 44 -#define TEMPLATE_MOD 45 -#define LAST_TEMPLATE TEMPLATE_MOD +#define TEMPLATE_NOP 0 +#define TEMPLATE_INTCONST8 1 +#define TEMPLATE_INTCONST32 2 +#define TEMPLATE_COPY8_REG 3 +#define TEMPLATE_COPY16_REG 4 +#define TEMPLATE_COPY32_REG 5 +#define TEMPLATE_COPY_IMM8 6 +#define TEMPLATE_COPY_IMM16 7 +#define TEMPLATE_COPY_IMM32 8 +#define TEMPLATE_PHI8 9 +#define TEMPLATE_PHI16 10 +#define TEMPLATE_PHI32 11 +#define TEMPLATE_STORE8 12 +#define TEMPLATE_STORE16 13 +#define TEMPLATE_STORE32 14 +#define TEMPLATE_LOAD8 15 +#define TEMPLATE_LOAD16 16 +#define TEMPLATE_LOAD32 17 +#define TEMPLATE_BINARY8_REG 18 +#define TEMPLATE_BINARY16_REG 19 +#define TEMPLATE_BINARY32_REG 20 +#define TEMPLATE_BINARY8_IMM 21 +#define TEMPLATE_BINARY16_IMM 22 +#define TEMPLATE_BINARY32_IMM 23 +#define TEMPLATE_SL8_CL 24 +#define TEMPLATE_SL16_CL 25 +#define TEMPLATE_SL32_CL 26 +#define TEMPLATE_SL8_IMM 27 +#define TEMPLATE_SL16_IMM 28 +#define TEMPLATE_SL32_IMM 29 +#define TEMPLATE_UNARY8 30 +#define TEMPLATE_UNARY16 31 +#define TEMPLATE_UNARY32 32 +#define TEMPLATE_CMP8_REG 33 +#define TEMPLATE_CMP16_REG 34 +#define TEMPLATE_CMP32_REG 35 +#define TEMPLATE_CMP8_IMM 36 +#define TEMPLATE_CMP16_IMM 37 +#define TEMPLATE_CMP32_IMM 38 +#define TEMPLATE_TEST8 39 +#define TEMPLATE_TEST16 40 +#define TEMPLATE_TEST32 41 +#define TEMPLATE_SET 42 +#define TEMPLATE_JMP 43 +#define TEMPLATE_INB_DX 44 +#define TEMPLATE_INB_IMM 45 +#define TEMPLATE_INW_DX 46 +#define TEMPLATE_INW_IMM 47 +#define TEMPLATE_INL_DX 48 +#define TEMPLATE_INL_IMM 49 +#define TEMPLATE_OUTB_DX 50 +#define TEMPLATE_OUTB_IMM 51 +#define TEMPLATE_OUTW_DX 52 +#define TEMPLATE_OUTW_IMM 53 +#define TEMPLATE_OUTL_DX 54 +#define TEMPLATE_OUTL_IMM 55 +#define TEMPLATE_BSF 56 +#define TEMPLATE_RDMSR 57 +#define TEMPLATE_WRMSR 58 +#define TEMPLATE_UMUL8 59 +#define TEMPLATE_UMUL16 60 +#define TEMPLATE_UMUL32 61 +#define TEMPLATE_DIV8 62 +#define TEMPLATE_DIV16 63 +#define TEMPLATE_DIV32 64 +#define LAST_TEMPLATE TEMPLATE_DIV32 #if LAST_TEMPLATE >= MAX_TEMPLATES #error "MAX_TEMPLATES to low" #endif -#define COPY8_REGCM (REGCM_GPR64 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8 | REGCM_MMX | REGCM_XMM) -#define COPY16_REGCM (REGCM_GPR64 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM) -#define COPY32_REGCM (REGCM_GPR64 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM) -#define COPYIMM8_REGCM (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8) -#define COPYIMM16_REGCM (REGCM_GPR32 | REGCM_GPR16) -#define COPYIMM32_REGCM (REGCM_GPR32) +#define COPY8_REGCM (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO | REGCM_MMX | REGCM_XMM) +#define COPY16_REGCM (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_GPR16 | REGCM_MMX | REGCM_XMM) +#define COPY32_REGCM (REGCM_DIVIDEND64 | REGCM_DIVIDEND32 | REGCM_GPR32 | REGCM_MMX | REGCM_XMM) static struct ins_template templates[] = { @@ -15549,15 +15806,15 @@ static struct ins_template templates[] = { .rhs = { [0] = { REG_UNSET, COPY32_REGCM } }, }, [TEMPLATE_COPY_IMM8] = { - .lhs = { [0] = { REG_UNSET, COPYIMM8_REGCM } }, + .lhs = { [0] = { REG_UNSET, COPY8_REGCM } }, .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } }, }, [TEMPLATE_COPY_IMM16] = { - .lhs = { [0] = { REG_UNSET, COPYIMM16_REGCM } }, + .lhs = { [0] = { REG_UNSET, COPY16_REGCM } }, .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM16 | REGCM_IMM8 } }, }, [TEMPLATE_COPY_IMM32] = { - .lhs = { [0] = { REG_UNSET, COPYIMM32_REGCM } }, + .lhs = { [0] = { REG_UNSET, COPY32_REGCM } }, .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM32 | REGCM_IMM16 | REGCM_IMM8 } }, }, [TEMPLATE_PHI8] = { @@ -15621,19 +15878,25 @@ static struct ins_template templates[] = { [15] = { REG_VIRT0, COPY32_REGCM }, }, }, [TEMPLATE_STORE8] = { - .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, - .rhs = { [0] = { REG_UNSET, REGCM_GPR8 } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR32 }, + [1] = { REG_UNSET, REGCM_GPR8_LO }, + }, }, [TEMPLATE_STORE16] = { - .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, - .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR32 }, + [1] = { REG_UNSET, REGCM_GPR16 }, + }, }, [TEMPLATE_STORE32] = { - .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, - .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR32 }, + [1] = { REG_UNSET, REGCM_GPR32 }, + }, }, [TEMPLATE_LOAD8] = { - .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } }, + .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } }, .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, }, [TEMPLATE_LOAD16] = { @@ -15644,69 +15907,169 @@ static struct ins_template templates[] = { .lhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, }, - [TEMPLATE_BINARY_REG] = { + [TEMPLATE_BINARY8_REG] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR8_LO }, + [1] = { REG_UNSET, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_BINARY16_REG] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR16 }, + [1] = { REG_UNSET, REGCM_GPR16 }, + }, + }, + [TEMPLATE_BINARY32_REG] = { .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 }, [1] = { REG_UNSET, REGCM_GPR32 }, }, }, - [TEMPLATE_BINARY_IMM] = { + [TEMPLATE_BINARY8_IMM] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR8_LO }, + [1] = { REG_UNNEEDED, REGCM_IMM8 }, + }, + }, + [TEMPLATE_BINARY16_IMM] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR16 }, + [1] = { REG_UNNEEDED, REGCM_IMM16 }, + }, + }, + [TEMPLATE_BINARY32_IMM] = { .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 }, [1] = { REG_UNNEEDED, REGCM_IMM32 }, }, }, - [TEMPLATE_SL_CL] = { + [TEMPLATE_SL8_CL] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR8_LO }, + [1] = { REG_CL, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_SL16_CL] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR16 }, + [1] = { REG_CL, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_SL32_CL] = { .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 }, - [1] = { REG_CL, REGCM_GPR8 }, + [1] = { REG_CL, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_SL8_IMM] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR8_LO }, + [1] = { REG_UNNEEDED, REGCM_IMM8 }, }, }, - [TEMPLATE_SL_IMM] = { + [TEMPLATE_SL16_IMM] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_VIRT0, REGCM_GPR16 }, + [1] = { REG_UNNEEDED, REGCM_IMM8 }, + }, + }, + [TEMPLATE_SL32_IMM] = { .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 }, [1] = { REG_UNNEEDED, REGCM_IMM8 }, }, }, - [TEMPLATE_UNARY] = { + [TEMPLATE_UNARY8] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + .rhs = { [0] = { REG_VIRT0, REGCM_GPR8_LO } }, + }, + [TEMPLATE_UNARY16] = { + .lhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + .rhs = { [0] = { REG_VIRT0, REGCM_GPR16 } }, + }, + [TEMPLATE_UNARY32] = { .lhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, .rhs = { [0] = { REG_VIRT0, REGCM_GPR32 } }, }, - [TEMPLATE_CMP_REG] = { + [TEMPLATE_CMP8_REG] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR8_LO }, + [1] = { REG_UNSET, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_CMP16_REG] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR16 }, + [1] = { REG_UNSET, REGCM_GPR16 }, + }, + }, + [TEMPLATE_CMP32_REG] = { .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, .rhs = { [0] = { REG_UNSET, REGCM_GPR32 }, [1] = { REG_UNSET, REGCM_GPR32 }, }, }, - [TEMPLATE_CMP_IMM] = { + [TEMPLATE_CMP8_IMM] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR8_LO }, + [1] = { REG_UNNEEDED, REGCM_IMM8 }, + }, + }, + [TEMPLATE_CMP16_IMM] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { + [0] = { REG_UNSET, REGCM_GPR16 }, + [1] = { REG_UNNEEDED, REGCM_IMM16 }, + }, + }, + [TEMPLATE_CMP32_IMM] = { .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, .rhs = { [0] = { REG_UNSET, REGCM_GPR32 }, [1] = { REG_UNNEEDED, REGCM_IMM32 }, }, }, - [TEMPLATE_TEST] = { + [TEMPLATE_TEST8] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } }, + }, + [TEMPLATE_TEST16] = { + .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, + .rhs = { [0] = { REG_UNSET, REGCM_GPR16 } }, + }, + [TEMPLATE_TEST32] = { .lhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, .rhs = { [0] = { REG_UNSET, REGCM_GPR32 } }, }, [TEMPLATE_SET] = { - .lhs = { [0] = { REG_UNSET, REGCM_GPR8 } }, + .lhs = { [0] = { REG_UNSET, REGCM_GPR8_LO } }, .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, }, [TEMPLATE_JMP] = { .rhs = { [0] = { REG_EFLAGS, REGCM_FLAGS } }, }, [TEMPLATE_INB_DX] = { - .lhs = { [0] = { REG_AL, REGCM_GPR8 } }, + .lhs = { [0] = { REG_AL, REGCM_GPR8_LO } }, .rhs = { [0] = { REG_DX, REGCM_GPR16 } }, }, [TEMPLATE_INB_IMM] = { - .lhs = { [0] = { REG_AL, REGCM_GPR8 } }, + .lhs = { [0] = { REG_AL, REGCM_GPR8_LO } }, .rhs = { [0] = { REG_UNNEEDED, REGCM_IMM8 } }, }, [TEMPLATE_INW_DX] = { @@ -15727,13 +16090,13 @@ static struct ins_template templates[] = { }, [TEMPLATE_OUTB_DX] = { .rhs = { - [0] = { REG_AL, REGCM_GPR8 }, + [0] = { REG_AL, REGCM_GPR8_LO }, [1] = { REG_DX, REGCM_GPR16 }, }, }, [TEMPLATE_OUTB_IMM] = { .rhs = { - [0] = { REG_AL, REGCM_GPR8 }, + [0] = { REG_AL, REGCM_GPR8_LO }, [1] = { REG_UNNEEDED, REGCM_IMM8 }, }, }, @@ -15779,30 +16142,54 @@ static struct ins_template templates[] = { [2] = { REG_EDX, REGCM_GPR32 }, }, }, - [TEMPLATE_UMUL] = { - .lhs = { [0] = { REG_EDXEAX, REGCM_GPR64 } }, + [TEMPLATE_UMUL8] = { + .lhs = { [0] = { REG_AX, REGCM_GPR16 } }, + .rhs = { + [0] = { REG_AL, REGCM_GPR8_LO }, + [1] = { REG_UNSET, REGCM_GPR8_LO }, + }, + }, + [TEMPLATE_UMUL16] = { + .lhs = { [0] = { REG_DXAX, REGCM_DIVIDEND32 } }, + .rhs = { + [0] = { REG_AX, REGCM_GPR16 }, + [1] = { REG_UNSET, REGCM_GPR16 }, + }, + }, + [TEMPLATE_UMUL32] = { + .lhs = { [0] = { REG_EDXEAX, REGCM_DIVIDEND64 } }, .rhs = { [0] = { REG_EAX, REGCM_GPR32 }, [1] = { REG_UNSET, REGCM_GPR32 }, }, }, - [TEMPLATE_DIV] = { + [TEMPLATE_DIV8] = { .lhs = { - [0] = { REG_EAX, REGCM_GPR32 }, - [1] = { REG_EDX, REGCM_GPR32 }, + [0] = { REG_AL, REGCM_GPR8_LO }, + [1] = { REG_AH, REGCM_GPR8 }, }, .rhs = { - [0] = { REG_EDXEAX, REGCM_GPR64 }, - [1] = { REG_UNSET, REGCM_GPR32 }, + [0] = { REG_AX, REGCM_GPR16 }, + [1] = { REG_UNSET, REGCM_GPR8_LO }, }, }, - [TEMPLATE_MOD] = { + [TEMPLATE_DIV16] = { .lhs = { - [0] = { REG_EDX, REGCM_GPR32 }, - [1] = { REG_EAX, REGCM_GPR32 }, + [0] = { REG_AX, REGCM_GPR16 }, + [1] = { REG_DX, REGCM_GPR16 }, }, .rhs = { - [0] = { REG_EDXEAX, REGCM_GPR64 }, + [0] = { REG_DXAX, REGCM_DIVIDEND32 }, + [1] = { REG_UNSET, REGCM_GPR16 }, + }, + }, + [TEMPLATE_DIV32] = { + .lhs = { + [0] = { REG_EAX, REGCM_GPR32 }, + [1] = { REG_EDX, REGCM_GPR32 }, + }, + .rhs = { + [0] = { REG_EDXEAX, REGCM_DIVIDEND64 }, [1] = { REG_UNSET, REGCM_GPR32 }, }, }, @@ -15828,11 +16215,11 @@ static void fixup_branches(struct compile_state *state, branch = entry->member; test = pre_triple(state, branch, cmp->op, cmp->type, left, right); - test->template_id = TEMPLATE_TEST; + test->template_id = TEMPLATE_TEST32; if (cmp->op == OP_CMP) { - test->template_id = TEMPLATE_CMP_REG; + test->template_id = TEMPLATE_CMP32_REG; if (get_imm32(test, &RHS(test, 1))) { - test->template_id = TEMPLATE_CMP_IMM; + test->template_id = TEMPLATE_CMP32_IMM; } } use_triple(RHS(test, 0), test); @@ -15859,11 +16246,11 @@ static void bool_cmp(struct compile_state *state, /* Modify the comparison operator */ ins->op = cmp_op; - ins->template_id = TEMPLATE_TEST; + ins->template_id = TEMPLATE_TEST32; if (cmp_op == OP_CMP) { - ins->template_id = TEMPLATE_CMP_REG; + ins->template_id = TEMPLATE_CMP32_REG; if (get_imm32(ins, &RHS(ins, 1))) { - ins->template_id = TEMPLATE_CMP_IMM; + ins->template_id = TEMPLATE_CMP32_IMM; } } /* Generate the instruction sequence that will transform the @@ -15974,6 +16361,47 @@ struct reg_info arch_reg_rhs(struct compile_state *state, struct triple *ins, in return result; } +static struct triple *mod_div(struct compile_state *state, + struct triple *ins, int div_op, int index) +{ + struct triple *div, *piece0, *piece1; + + /* Generate a piece to hold the remainder */ + piece1 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0); + piece1->u.cval = 1; + + /* Generate a piece to hold the quotient */ + piece0 = post_triple(state, ins, OP_PIECE, ins->type, 0, 0); + piece0->u.cval = 0; + + /* Generate the appropriate division instruction */ + div = post_triple(state, ins, div_op, ins->type, 0, 0); + RHS(div, 0) = RHS(ins, 0); + RHS(div, 1) = RHS(ins, 1); + LHS(div, 0) = piece0; + LHS(div, 1) = piece1; + div->template_id = TEMPLATE_DIV32; + use_triple(RHS(div, 0), div); + use_triple(RHS(div, 1), div); + use_triple(LHS(div, 0), div); + use_triple(LHS(div, 1), div); + + /* Hook on piece0 */ + MISC(piece0, 0) = div; + use_triple(div, piece0); + + /* Hook on piece1 */ + MISC(piece1, 0) = div; + use_triple(div, piece1); + + /* Replate uses of ins with the appropriate piece of the div */ + propogate_use(state, ins, LHS(div, index)); + release_triple(state, ins); + + /* Return the address of the next instruction */ + return piece1->next; +} + static struct triple *transform_to_arch_instruction( struct compile_state *state, struct triple *ins) { @@ -16089,38 +16517,45 @@ static struct triple *transform_to_arch_instruction( case OP_XOR: case OP_OR: case OP_SMUL: - ins->template_id = TEMPLATE_BINARY_REG; + ins->template_id = TEMPLATE_BINARY32_REG; if (get_imm32(ins, &RHS(ins, 1))) { - ins->template_id = TEMPLATE_BINARY_IMM; + ins->template_id = TEMPLATE_BINARY32_IMM; } break; -#if 0 - /* This code does not work yet */ + case OP_SDIVT: + case OP_UDIVT: + ins->template_id = TEMPLATE_DIV32; + next = after_lhs(state, ins); + break; + /* FIXME UMUL does not work yet.. */ case OP_UMUL: - ins->template_id = TEMPLATE_UMUL; + ins->template_id = TEMPLATE_UMUL32; break; case OP_UDIV: + next = mod_div(state, ins, OP_UDIVT, 0); + break; case OP_SDIV: - ins->template_id = TEMPLATE_DIV; + next = mod_div(state, ins, OP_SDIVT, 0); break; case OP_UMOD: + next = mod_div(state, ins, OP_UDIVT, 1); + break; case OP_SMOD: - ins->template_id = TEMPLATE_MOD; + next = mod_div(state, ins, OP_SDIVT, 1); break; -#endif case OP_SL: case OP_SSR: case OP_USR: - ins->template_id = TEMPLATE_SL_CL; + ins->template_id = TEMPLATE_SL32_CL; if (get_imm8(ins, &RHS(ins, 1))) { - ins->template_id = TEMPLATE_SL_IMM; + ins->template_id = TEMPLATE_SL32_IMM; } else if (size_of(state, RHS(ins, 1)->type) > 1) { typed_pre_copy(state, &char_type, ins, 1); } break; case OP_INVERT: case OP_NEG: - ins->template_id = TEMPLATE_UNARY; + ins->template_id = TEMPLATE_UNARY32; break; case OP_EQ: bool_cmp(state, ins, OP_CMP, OP_JMP_EQ, OP_SET_EQ); @@ -16209,12 +16644,12 @@ static struct triple *transform_to_arch_instruction( break; /* Already transformed instructions */ case OP_TEST: - ins->template_id = TEMPLATE_TEST; + ins->template_id = TEMPLATE_TEST32; break; case OP_CMP: - ins->template_id = TEMPLATE_CMP_REG; + ins->template_id = TEMPLATE_CMP32_REG; if (get_imm32(ins, &RHS(ins, 1))) { - ins->template_id = TEMPLATE_CMP_IMM; + ins->template_id = TEMPLATE_CMP32_IMM; } break; case OP_JMP_EQ: case OP_JMP_NOTEQ: @@ -16241,18 +16676,21 @@ static struct triple *transform_to_arch_instruction( return next; } +static long next_label(struct compile_state *state) +{ + static long label_counter = 0; + return ++label_counter; +} static void generate_local_labels(struct compile_state *state) { struct triple *first, *label; - int label_counter; - label_counter = 0; first = RHS(state->main_function, 0); label = first; do { if ((label->op == OP_LABEL) || (label->op == OP_SDECL)) { if (label->use) { - label->u.cval = ++label_counter; + label->u.cval = next_label(state); } else { label->u.cval = 0; } @@ -16281,6 +16719,9 @@ static int check_reg(struct compile_state *state, static const char *arch_reg_str(int reg) { +#if REG_XMM7 != 44 +#error "Registers have renumberd fix arch_reg_str" +#endif static const char *regs[] = { "%unset", "%unneeded", @@ -16289,6 +16730,7 @@ static const char *arch_reg_str(int reg) "%ax", "%bx", "%cx", "%dx", "%si", "%di", "%bp", "%sp", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp", "%edx:%eax", + "%dx:%ax", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", @@ -16343,11 +16785,75 @@ static void print_const_val( } } +static void print_const(struct compile_state *state, + struct triple *ins, FILE *fp) +{ + switch(ins->op) { + case OP_INTCONST: + switch(ins->type->type & TYPE_MASK) { + case TYPE_CHAR: + case TYPE_UCHAR: + fprintf(fp, ".byte 0x%02lx\n", ins->u.cval); + break; + case TYPE_SHORT: + case TYPE_USHORT: + fprintf(fp, ".short 0x%04lx\n", ins->u.cval); + break; + case TYPE_INT: + case TYPE_UINT: + case TYPE_LONG: + case TYPE_ULONG: + fprintf(fp, ".int %lu\n", ins->u.cval); + break; + default: + internal_error(state, ins, "Unknown constant type"); + } + break; + case OP_ADDRCONST: + fprintf(fp, " .int L%s%lu+%lu ", + state->label_prefix, + MISC(ins, 0)->u.cval, + ins->u.cval); + break; + case OP_BLOBCONST: + { + unsigned char *blob; + size_t size, i; + size = size_of(state, ins->type); + blob = ins->u.blob; + for(i = 0; i < size; i++) { + fprintf(fp, ".byte 0x%02x\n", + blob[i]); + } + break; + } + default: + internal_error(state, ins, "Unknown constant type"); + break; + } +} + +#define TEXT_SECTION ".rom.text" +#define DATA_SECTION ".rom.data" + +static long get_const_pool_ref( + struct compile_state *state, struct triple *ins, FILE *fp) +{ + long ref; + ref = next_label(state); + fprintf(fp, ".section \"" DATA_SECTION "\"\n"); + fprintf(fp, ".balign %d\n", align_of(state, ins->type)); + fprintf(fp, "L%s%lu:\n", state->label_prefix, ref); + print_const(state, ins, fp); + fprintf(fp, ".section \"" TEXT_SECTION "\"\n"); + return ref; +} + static void print_binary_op(struct compile_state *state, const char *op, struct triple *ins, FILE *fp) { unsigned mask; - mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8; + mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO; if (RHS(ins, 0)->id != ins->id) { internal_error(state, ins, "invalid register assignment"); } @@ -16375,7 +16881,7 @@ static void print_unary_op(struct compile_state *state, const char *op, struct triple *ins, FILE *fp) { unsigned mask; - mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8; + mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO; fprintf(fp, "\t%s %s\n", op, reg(state, RHS(ins, 0), mask)); @@ -16385,7 +16891,7 @@ static void print_op_shift(struct compile_state *state, const char *op, struct triple *ins, FILE *fp) { unsigned mask; - mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8; + mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO; if (RHS(ins, 0)->id != ins->id) { internal_error(state, ins, "invalid register assignment"); } @@ -16398,7 +16904,7 @@ static void print_op_shift(struct compile_state *state, else { fprintf(fp, "\t%s %s, %s\n", op, - reg(state, RHS(ins, 1), REGCM_GPR8), + reg(state, RHS(ins, 1), REGCM_GPR8_LO), reg(state, RHS(ins, 0), mask)); } } @@ -16410,7 +16916,7 @@ static void print_op_in(struct compile_state *state, struct triple *ins, FILE *f int dreg; mask = 0; switch(ins->op) { - case OP_INB: op = "inb", mask = REGCM_GPR8; break; + case OP_INB: op = "inb", mask = REGCM_GPR8_LO; break; case OP_INW: op = "inw", mask = REGCM_GPR16; break; case OP_INL: op = "inl", mask = REGCM_GPR32; break; default: @@ -16448,7 +16954,7 @@ static void print_op_out(struct compile_state *state, struct triple *ins, FILE * int lreg; mask = 0; switch(ins->op) { - case OP_OUTB: op = "outb", mask = REGCM_GPR8; break; + case OP_OUTB: op = "outb", mask = REGCM_GPR8_LO; break; case OP_OUTW: op = "outw", mask = REGCM_GPR16; break; case OP_OUTL: op = "outl", mask = REGCM_GPR32; break; default: @@ -16493,10 +16999,6 @@ static void print_op_move(struct compile_state *state, src = RHS(ins, 0); dst = ins; } - else if (ins->op == OP_WRITE) { - dst = LHS(ins, 0); - src = RHS(ins, 0); - } else { internal_error(state, ins, "unknown move operation"); src = dst = 0; @@ -16504,13 +17006,13 @@ static void print_op_move(struct compile_state *state, if (!is_const(src)) { int src_reg, dst_reg; int src_regcm, dst_regcm; - src_reg = ID_REG(src->id); + src_reg = ID_REG(src->id); dst_reg = ID_REG(dst->id); src_regcm = arch_reg_regcm(state, src_reg); - dst_regcm = arch_reg_regcm(state, dst_reg); + dst_regcm = arch_reg_regcm(state, dst_reg); /* If the class is the same just move the register */ if (src_regcm & dst_regcm & - (REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32)) { + (REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32)) { if ((src_reg != dst_reg) || !omit_copy) { fprintf(fp, "\tmov %s, %s\n", reg(state, src, src_regcm), @@ -16539,7 +17041,7 @@ static void print_op_move(struct compile_state *state, } /* Move 32bit to 8bit */ else if ((src_regcm & REGCM_GPR32_8) && - (dst_regcm & REGCM_GPR8)) + (dst_regcm & REGCM_GPR8_LO)) { src_reg = (src_reg - REGC_GPR32_8_FIRST) + REGC_GPR8_FIRST; if ((src_reg != dst_reg) || !omit_copy) { @@ -16550,7 +17052,7 @@ static void print_op_move(struct compile_state *state, } /* Move 16bit to 8bit */ else if ((src_regcm & REGCM_GPR16_8) && - (dst_regcm & REGCM_GPR8)) + (dst_regcm & REGCM_GPR8_LO)) { src_reg = (src_reg - REGC_GPR16_8_FIRST) + REGC_GPR8_FIRST; if ((src_reg != dst_reg) || !omit_copy) { @@ -16560,7 +17062,7 @@ static void print_op_move(struct compile_state *state, } } /* Move 8/16bit to 16/32bit */ - else if ((src_regcm & (REGCM_GPR8 | REGCM_GPR16)) && + else if ((src_regcm & (REGCM_GPR8_LO | REGCM_GPR16)) && (dst_regcm & (REGCM_GPR16 | REGCM_GPR32))) { const char *op; op = is_signed(src->type)? "movsx": "movzx"; @@ -16577,15 +17079,26 @@ static void print_op_move(struct compile_state *state, reg(state, dst, dst_regcm)); } } - /* Move between mmx registers or mmx & sse registers */ - else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) && - (dst_regcm & (REGCM_MMX | REGCM_XMM))) { + /* Move between mmx registers */ + else if ((src_regcm & dst_regcm & REGCM_MMX)) { if ((src_reg != dst_reg) || !omit_copy) { fprintf(fp, "\tmovq %s, %s\n", reg(state, src, src_regcm), reg(state, dst, dst_regcm)); } } + /* Move from sse to mmx registers */ + else if ((src_regcm & REGCM_XMM) && (dst_regcm & REGCM_MMX)) { + fprintf(fp, "\tmovdq2q %s, %s\n", + reg(state, src, src_regcm), + reg(state, dst, dst_regcm)); + } + /* Move from mmx to sse registers */ + else if ((src_regcm & REGCM_MMX) && (dst_regcm & REGCM_XMM)) { + fprintf(fp, "\tmovq2dq %s, %s\n", + reg(state, src, src_regcm), + reg(state, dst, dst_regcm)); + } /* Move between 32bit gprs & mmx/sse registers */ else if ((src_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM)) && (dst_regcm & (REGCM_GPR32 | REGCM_MMX | REGCM_XMM))) { @@ -16607,7 +17120,6 @@ static void print_op_move(struct compile_state *state, arch_reg_str(mid_reg), arch_reg_str(dst_reg)); } - /* Move from mmx/sse registers to 16bit gprs */ else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) && (dst_regcm & REGCM_GPR16)) { @@ -16616,10 +17128,49 @@ static void print_op_move(struct compile_state *state, arch_reg_str(src_reg), arch_reg_str(dst_reg)); } - + /* Move from gpr to 64bit dividend */ + else if ((src_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)) && + (dst_regcm & REGCM_DIVIDEND64)) { + const char *extend; + extend = is_signed(src->type)? "cltd":"movl $0, %edx"; + fprintf(fp, "\tmov %s, %%eax\n\t%s\n", + arch_reg_str(src_reg), + extend); + } + /* Move from 64bit gpr to gpr */ + else if ((src_regcm & REGCM_DIVIDEND64) && + (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO))) { + if (dst_regcm & REGCM_GPR32) { + src_reg = REG_EAX; + } + else if (dst_regcm & REGCM_GPR16) { + src_reg = REG_AX; + } + else if (dst_regcm & REGCM_GPR8_LO) { + src_reg = REG_AL; + } + fprintf(fp, "\tmov %s, %s\n", + arch_reg_str(src_reg), + arch_reg_str(dst_reg)); + } + /* Move from mmx/sse registers to 64bit gpr */ + else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) && + (dst_regcm & REGCM_DIVIDEND64)) { + const char *extend; + extend = is_signed(src->type)? "cltd": "movl $0, %edx"; + fprintf(fp, "\tmovd %s, %%eax\n\t%s\n", + arch_reg_str(src_reg), + extend); + } + /* Move from 64bit gpr to mmx/sse register */ + else if ((src_regcm & REGCM_DIVIDEND64) && + (dst_regcm & (REGCM_XMM | REGCM_MMX))) { + fprintf(fp, "\tmovd %%eax, %s\n", + arch_reg_str(dst_reg)); + } #if X86_4_8BIT_GPRS /* Move from 8bit gprs to mmx/sse registers */ - else if ((src_regcm & REGCM_GPR8) && (src_reg <= REG_DL) && + else if ((src_regcm & REGCM_GPR8_LO) && (src_reg <= REG_DL) && (dst_regcm & (REGCM_MMX | REGCM_XMM))) { const char *op; int mid_reg; @@ -16634,7 +17185,7 @@ static void print_op_move(struct compile_state *state, } /* Move from mmx/sse registers and 8bit gprs */ else if ((src_regcm & (REGCM_MMX | REGCM_XMM)) && - (dst_regcm & REGCM_GPR8) && (dst_reg <= REG_DL)) { + (dst_regcm & REGCM_GPR8_LO) && (dst_reg <= REG_DL)) { int mid_reg; mid_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST; fprintf(fp, "\tmovd %s, %s\n", @@ -16643,7 +17194,7 @@ static void print_op_move(struct compile_state *state, } /* Move from 32bit gprs to 8bit gprs */ else if ((src_regcm & REGCM_GPR32) && - (dst_regcm & REGCM_GPR8)) { + (dst_regcm & REGCM_GPR8_LO)) { dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR32_FIRST; if ((src_reg != dst_reg) || !omit_copy) { fprintf(fp, "\tmov %s, %s\n", @@ -16653,7 +17204,7 @@ static void print_op_move(struct compile_state *state, } /* Move from 16bit gprs to 8bit gprs */ else if ((src_regcm & REGCM_GPR16) && - (dst_regcm & REGCM_GPR8)) { + (dst_regcm & REGCM_GPR8_LO)) { dst_reg = (dst_reg - REGC_GPR8_FIRST) + REGC_GPR16_FIRST; if ((src_reg != dst_reg) || !omit_copy) { fprintf(fp, "\tmov %s, %s\n", @@ -16667,10 +17218,44 @@ static void print_op_move(struct compile_state *state, } } else { - fprintf(fp, "\tmov "); - print_const_val(state, src, fp); - fprintf(fp, ", %s\n", - reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8)); + int dst_reg; + int dst_regcm; + dst_reg = ID_REG(dst->id); + dst_regcm = arch_reg_regcm(state, dst_reg); + if (dst_regcm & (REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)) { + fprintf(fp, "\tmov "); + print_const_val(state, src, fp); + fprintf(fp, ", %s\n", + reg(state, dst, REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO)); + } + else if (dst_regcm & REGCM_DIVIDEND64) { + if (size_of(state, dst->type) > 4) { + internal_error(state, ins, "64bit constant..."); + } + fprintf(fp, "\tmov $0, %%edx\n"); + fprintf(fp, "\tmov "); + print_const_val(state, src, fp); + fprintf(fp, ", %%eax\n"); + } + else if (dst_regcm & REGCM_DIVIDEND32) { + if (size_of(state, dst->type) > 2) { + internal_error(state, ins, "32bit constant..."); + } + fprintf(fp, "\tmov $0, %%dx\n"); + fprintf(fp, "\tmov "); + print_const_val(state, src, fp); + fprintf(fp, ", %%ax"); + } + else if (dst_regcm & (REGCM_XMM | REGCM_MMX)) { + long ref; + ref = get_const_pool_ref(state, src, fp); + fprintf(fp, "\tmovq L%s%lu, %s\n", + state->label_prefix, ref, + reg(state, dst, (REGCM_XMM | REGCM_MMX))); + } + else { + internal_error(state, ins, "unknown copy immediate type"); + } } } @@ -16685,7 +17270,7 @@ static void print_op_load(struct compile_state *state, } fprintf(fp, "\tmov (%s), %s\n", reg(state, src, REGCM_GPR32), - reg(state, dst, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32)); + reg(state, dst, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32)); } @@ -16693,8 +17278,8 @@ static void print_op_store(struct compile_state *state, struct triple *ins, FILE *fp) { struct triple *dst, *src; - dst = LHS(ins, 0); - src = RHS(ins, 0); + dst = RHS(ins, 0); + src = RHS(ins, 1); if (is_const(src) && (src->op == OP_INTCONST)) { long_t value; value = (long_t)(src->u.cval); @@ -16706,7 +17291,7 @@ static void print_op_store(struct compile_state *state, else if (is_const(dst) && (dst->op == OP_INTCONST)) { fprintf(fp, "\tmov%s %s, 0x%08lx\n", type_suffix(state, src->type), - reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32), + reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32), dst->u.cval); } else { @@ -16715,7 +17300,7 @@ static void print_op_store(struct compile_state *state, } fprintf(fp, "\tmov%s %s, (%s)\n", type_suffix(state, src->type), - reg(state, src, REGCM_GPR8 | REGCM_GPR16 | REGCM_GPR32), + reg(state, src, REGCM_GPR8_LO | REGCM_GPR16 | REGCM_GPR32), reg(state, dst, REGCM_GPR32)); } @@ -16742,7 +17327,7 @@ static void print_op_cmp(struct compile_state *state, { unsigned mask; int dreg; - mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8; + mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO; dreg = check_reg(state, ins, REGCM_FLAGS); if (!reg_is_reg(state, dreg, REG_EFLAGS)) { internal_error(state, ins, "bad dest register for cmp"); @@ -16770,7 +17355,7 @@ static void print_op_test(struct compile_state *state, struct triple *ins, FILE *fp) { unsigned mask; - mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8; + mask = REGCM_GPR32 | REGCM_GPR16 | REGCM_GPR8_LO; fprintf(fp, "\ttest %s, %s\n", reg(state, RHS(ins, 0), mask), reg(state, RHS(ins, 0), mask)); @@ -16857,7 +17442,7 @@ static void print_op_set(struct compile_state *state, break; } fprintf(fp, "\t%s %s\n", - sop, reg(state, set, REGCM_GPR8)); + sop, reg(state, set, REGCM_GPR8_LO)); } static void print_op_bit_scan(struct compile_state *state, @@ -16883,50 +17468,6 @@ static void print_op_bit_scan(struct compile_state *state, reg(state, ins, REGCM_GPR32)); } -static void print_const(struct compile_state *state, - struct triple *ins, FILE *fp) -{ - switch(ins->op) { - case OP_INTCONST: - switch(ins->type->type & TYPE_MASK) { - case TYPE_CHAR: - case TYPE_UCHAR: - fprintf(fp, ".byte 0x%02lx\n", ins->u.cval); - break; - case TYPE_SHORT: - case TYPE_USHORT: - fprintf(fp, ".short 0x%04lx\n", ins->u.cval); - break; - case TYPE_INT: - case TYPE_UINT: - case TYPE_LONG: - case TYPE_ULONG: - fprintf(fp, ".int %lu\n", ins->u.cval); - break; - default: - internal_error(state, ins, "Unknown constant type"); - } - break; - case OP_BLOBCONST: - { - unsigned char *blob; - size_t size, i; - size = size_of(state, ins->type); - blob = ins->u.blob; - for(i = 0; i < size; i++) { - fprintf(fp, ".byte 0x%02x\n", - blob[i]); - } - break; - } - default: - internal_error(state, ins, "Unknown constant type"); - break; - } -} - -#define TEXT_SECTION ".rom.text" -#define DATA_SECTION ".rom.data" static void print_sdecl(struct compile_state *state, struct triple *ins, FILE *fp) @@ -16970,7 +17511,6 @@ static void print_instruction(struct compile_state *state, case OP_SDECL: print_sdecl(state, ins, fp); break; - case OP_WRITE: case OP_COPY: print_op_move(state, ins, fp); break; @@ -17020,6 +17560,15 @@ static void print_instruction(struct compile_state *state, case OP_HLT: fprintf(fp, "\thlt\n"); break; + case OP_SDIVT: + fprintf(fp, "\tidiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32)); + break; + case OP_UDIVT: + fprintf(fp, "\tdiv %s\n", reg(state, RHS(ins, 1), REGCM_GPR32)); + break; + case OP_UMUL: + fprintf(fp, "\tmul %s\n", reg(state, RHS(ins, 1), REGCM_GPR32)); + break; case OP_LABEL: if (!ins->use) { return; @@ -17029,11 +17578,9 @@ static void print_instruction(struct compile_state *state, /* Ignore OP_PIECE */ case OP_PIECE: break; - /* Operations I am not yet certain how to handle */ - case OP_UMUL: + /* Operations that should never get here */ case OP_SDIV: case OP_UDIV: case OP_SMOD: case OP_UMOD: - /* Operations that should never get here */ case OP_LTRUE: case OP_LFALSE: case OP_EQ: case OP_NOTEQ: case OP_SLESS: case OP_ULESS: case OP_SMORE: case OP_UMORE: case OP_SLESSEQ: case OP_ULESSEQ: case OP_SMOREEQ: case OP_UMOREEQ: @@ -17050,6 +17597,8 @@ static void print_instructions(struct compile_state *state) int print_location; struct occurance *last_occurance; FILE *fp; + int max_inline_depth; + max_inline_depth = 0; print_location = 1; last_occurance = 0; fp = state->output; @@ -17068,8 +17617,11 @@ static void print_instructions(struct compile_state *state) } else { struct occurance *ptr; + int inline_depth; fprintf(fp, "\t/*\n"); + inline_depth = 0; for(ptr = ins->occurance; ptr; ptr = ptr->parent) { + inline_depth++; fprintf(fp, "\t * %s,%s:%d.%d\n", ptr->function, ptr->filename, @@ -17077,7 +17629,9 @@ static void print_instructions(struct compile_state *state) ptr->col); } fprintf(fp, "\t */\n"); - + if (inline_depth > max_inline_depth) { + max_inline_depth = inline_depth; + } } if (last_occurance) { put_occurance(last_occurance); @@ -17089,8 +17643,12 @@ static void print_instructions(struct compile_state *state) print_instruction(state, ins, fp); ins = ins->next; } while(ins != first); - + if (print_location) { + fprintf(fp, "/* max inline depth %d */\n", + max_inline_depth); + } } + static void generate_code(struct compile_state *state) { generate_local_labels(state); diff --git a/util/romcc/tests/raminit_test5.c b/util/romcc/tests/raminit_test5.c new file mode 100644 index 0000000000..f386a75ebf --- /dev/null +++ b/util/romcc/tests/raminit_test5.c @@ -0,0 +1,1392 @@ + + + + + + + +typedef unsigned char uint8_t; +typedef signed char int8_t; + +typedef unsigned short uint16_t; +typedef signed short int16_t; + +typedef unsigned int uint32_t; +typedef signed int int32_t; + + + + + + + +typedef unsigned char uint_least8_t; +typedef signed char int_least8_t; + +typedef unsigned short uint_least16_t; +typedef signed short int_least16_t; + +typedef unsigned int uint_least32_t; +typedef signed int int_least32_t; + + + + + + + +typedef unsigned char uint_fast8_t; +typedef signed char int_fast8_t; + +typedef unsigned int uint_fast16_t; +typedef signed int int_fast16_t; + +typedef unsigned int uint_fast32_t; +typedef signed int int_fast32_t; + + + + + + + +typedef int intptr_t; +typedef unsigned int uintptr_t; + + + + + + +typedef long int intmax_t; +typedef unsigned long int uintmax_t; + + + + +static void outb(unsigned char value, unsigned short port) +{ + __builtin_outb(value, port); +} + +static void outw(unsigned short value, unsigned short port) +{ + __builtin_outw(value, port); +} + +static void outl(unsigned int value, unsigned short port) +{ + __builtin_outl(value, port); +} + + +static unsigned char inb(unsigned short port) +{ + return __builtin_inb(port); +} + + +static unsigned char inw(unsigned short port) +{ + return __builtin_inw(port); +} + +static unsigned char inl(unsigned short port) +{ + return __builtin_inl(port); +} + +static void hlt(void) +{ + __builtin_hlt(); +} + +int log2(int value) +{ + + + + + + + return __builtin_bsr(value); +} + + +typedef __builtin_msr_t msr_t; + +static msr_t rdmsr(unsigned long index) +{ + return __builtin_rdmsr(index); +} + +static void wrmsr(unsigned long index, msr_t msr) +{ + __builtin_wrmsr(index, msr.lo, msr.hi); +} + +typedef unsigned device_t; + +static unsigned char pci_read_config8(device_t dev, unsigned where) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + return inb(0xCFC + (addr & 3)); +} + +static unsigned short pci_read_config16(device_t dev, unsigned where) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + return inw(0xCFC + (addr & 2)); +} + +static unsigned int pci_read_config32(device_t dev, unsigned where) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + return inl(0xCFC); +} + +static void pci_write_config8(device_t dev, unsigned where, unsigned char value) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + outb(value, 0xCFC + (addr & 3)); +} + +static void pci_write_config16(device_t dev, unsigned where, unsigned short value) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + outw(value, 0xCFC + (addr & 2)); +} + +static void pci_write_config32(device_t dev, unsigned where, unsigned int value) +{ + unsigned addr; + addr = dev | where; + outl(0x80000000 | (addr & ~3), 0xCF8); + outl(value, 0xCFC); +} + + +static device_t pci_locate_device(unsigned pci_id, device_t dev) +{ + for(; dev <= ( (((255) & 0xFF) << 16) | (((31) & 0x1f) << 11) | (((7) & 0x7) << 8)); dev += ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((1) & 0x7) << 8))) { + unsigned int id; + id = pci_read_config32(dev, 0); + if (id == pci_id) { + return dev; + } + } + return (0xffffffffU); +} + + + + + +static int uart_can_tx_byte(void) +{ + return inb(0x3f8 + 0x05) & 0x20; +} + +static void uart_wait_to_tx_byte(void) +{ + while(!uart_can_tx_byte()) + ; +} + +static void uart_wait_until_sent(void) +{ + while(!(inb(0x3f8 + 0x05) & 0x40)) + ; +} + +static void uart_tx_byte(unsigned char data) +{ + uart_wait_to_tx_byte(); + outb(data, 0x3f8 + 0x00); + + uart_wait_until_sent(); +} + +static void uart_init(void) +{ + + outb(0x0, 0x3f8 + 0x01); + + outb(0x01, 0x3f8 + 0x02); + + outb(0x80 | 0x3, 0x3f8 + 0x03); + + outb((115200/115200) & 0xFF, 0x3f8 + 0x00); + outb(((115200/115200) >> 8) & 0xFF, 0x3f8 + 0x01); + + outb(0x3, 0x3f8 + 0x03); +} + + + + + +static void __console_tx_byte(unsigned char byte) +{ + uart_tx_byte(byte); +} + +static void __console_tx_nibble(unsigned nibble) +{ + unsigned char digit; + digit = nibble + '0'; + if (digit > '9') { + digit += 39; + } + __console_tx_byte(digit); +} + +static void __console_tx_char(int loglevel, unsigned char byte) +{ + if (8 > loglevel) { + uart_tx_byte(byte); + } +} + +static void __console_tx_hex8(int loglevel, unsigned char value) +{ + if (8 > loglevel) { + __console_tx_nibble((value >> 4U) & 0x0fU); + __console_tx_nibble(value & 0x0fU); + } +} + +static void __console_tx_hex16(int loglevel, unsigned short value) +{ + if (8 > loglevel) { + __console_tx_nibble((value >> 12U) & 0x0fU); + __console_tx_nibble((value >> 8U) & 0x0fU); + __console_tx_nibble((value >> 4U) & 0x0fU); + __console_tx_nibble(value & 0x0fU); + } +} + +static void __console_tx_hex32(int loglevel, unsigned int value) +{ + if (8 > loglevel) { + __console_tx_nibble((value >> 28U) & 0x0fU); + __console_tx_nibble((value >> 24U) & 0x0fU); + __console_tx_nibble((value >> 20U) & 0x0fU); + __console_tx_nibble((value >> 16U) & 0x0fU); + __console_tx_nibble((value >> 12U) & 0x0fU); + __console_tx_nibble((value >> 8U) & 0x0fU); + __console_tx_nibble((value >> 4U) & 0x0fU); + __console_tx_nibble(value & 0x0fU); + } +} + +static void __console_tx_string(int loglevel, const char *str) +{ + if (8 > loglevel) { + unsigned char ch; + while((ch = *str++) != '\0') { + __console_tx_byte(ch); + } + } +} + +static void print_emerg_char(unsigned char byte) { __console_tx_char(0, byte); } +static void print_emerg_hex8(unsigned char value){ __console_tx_hex8(0, value); } +static void print_emerg_hex16(unsigned short value){ __console_tx_hex16(0, value); } +static void print_emerg_hex32(unsigned int value) { __console_tx_hex32(0, value); } +static void print_emerg(const char *str) { __console_tx_string(0, str); } + +static void print_alert_char(unsigned char byte) { __console_tx_char(1, byte); } +static void print_alert_hex8(unsigned char value) { __console_tx_hex8(1, value); } +static void print_alert_hex16(unsigned short value){ __console_tx_hex16(1, value); } +static void print_alert_hex32(unsigned int value) { __console_tx_hex32(1, value); } +static void print_alert(const char *str) { __console_tx_string(1, str); } + +static void print_crit_char(unsigned char byte) { __console_tx_char(2, byte); } +static void print_crit_hex8(unsigned char value) { __console_tx_hex8(2, value); } +static void print_crit_hex16(unsigned short value){ __console_tx_hex16(2, value); } +static void print_crit_hex32(unsigned int value) { __console_tx_hex32(2, value); } +static void print_crit(const char *str) { __console_tx_string(2, str); } + +static void print_err_char(unsigned char byte) { __console_tx_char(3, byte); } +static void print_err_hex8(unsigned char value) { __console_tx_hex8(3, value); } +static void print_err_hex16(unsigned short value){ __console_tx_hex16(3, value); } +static void print_err_hex32(unsigned int value) { __console_tx_hex32(3, value); } +static void print_err(const char *str) { __console_tx_string(3, str); } + +static void print_warning_char(unsigned char byte) { __console_tx_char(4, byte); } +static void print_warning_hex8(unsigned char value) { __console_tx_hex8(4, value); } +static void print_warning_hex16(unsigned short value){ __console_tx_hex16(4, value); } +static void print_warning_hex32(unsigned int value) { __console_tx_hex32(4, value); } +static void print_warning(const char *str) { __console_tx_string(4, str); } + +static void print_notice_char(unsigned char byte) { __console_tx_char(5, byte); } +static void print_notice_hex8(unsigned char value) { __console_tx_hex8(5, value); } +static void print_notice_hex16(unsigned short value){ __console_tx_hex16(5, value); } +static void print_notice_hex32(unsigned int value) { __console_tx_hex32(5, value); } +static void print_notice(const char *str) { __console_tx_string(5, str); } + +static void print_info_char(unsigned char byte) { __console_tx_char(6, byte); } +static void print_info_hex8(unsigned char value) { __console_tx_hex8(6, value); } +static void print_info_hex16(unsigned short value){ __console_tx_hex16(6, value); } +static void print_info_hex32(unsigned int value) { __console_tx_hex32(6, value); } +static void print_info(const char *str) { __console_tx_string(6, str); } + +static void print_debug_char(unsigned char byte) { __console_tx_char(7, byte); } +static void print_debug_hex8(unsigned char value) { __console_tx_hex8(7, value); } +static void print_debug_hex16(unsigned short value){ __console_tx_hex16(7, value); } +static void print_debug_hex32(unsigned int value) { __console_tx_hex32(7, value); } +static void print_debug(const char *str) { __console_tx_string(7, str); } + +static void print_spew_char(unsigned char byte) { __console_tx_char(8, byte); } +static void print_spew_hex8(unsigned char value) { __console_tx_hex8(8, value); } +static void print_spew_hex16(unsigned short value){ __console_tx_hex16(8, value); } +static void print_spew_hex32(unsigned int value) { __console_tx_hex32(8, value); } +static void print_spew(const char *str) { __console_tx_string(8, str); } + +static void console_init(void) +{ + static const char console_test[] = + "\r\n\r\nLinuxBIOS-" + "1.1.0" + ".0Fallback" + " " + "Thu Jun 19 05:42:16 MDT 2003" + " starting...\r\n"; + print_info(console_test); +} + + +static void die(const char *str) +{ + print_emerg(str); + do { + hlt(); + } while(1); +} + + + + + + + + + +static void write_phys(unsigned long addr, unsigned long value) +{ + + asm volatile( + "movnti %1, (%0)" + : + : "r" (addr), "r" (value) + : + ); + + + + + +} + +static unsigned long read_phys(unsigned long addr) +{ + volatile unsigned long *ptr; + ptr = (void *)addr; + return *ptr; +} + +static void ram_fill(unsigned long start, unsigned long stop) +{ + unsigned long addr; + + + + print_debug("DRAM fill: "); + print_debug_hex32(start); + print_debug("-"); + print_debug_hex32(stop); + print_debug("\r\n"); + for(addr = start; addr < stop ; addr += 4) { + + if (!(addr & 0xffff)) { + print_debug_hex32(addr); + print_debug("\r"); + } + write_phys(addr, addr); + }; + + print_debug_hex32(addr); + print_debug("\r\nDRAM filled\r\n"); +} + +static void ram_verify(unsigned long start, unsigned long stop) +{ + unsigned long addr; + + + + print_debug("DRAM verify: "); + print_debug_hex32(start); + print_debug_char('-'); + print_debug_hex32(stop); + print_debug("\r\n"); + for(addr = start; addr < stop ; addr += 4) { + unsigned long value; + + if (!(addr & 0xffff)) { + print_debug_hex32(addr); + print_debug("\r"); + } + value = read_phys(addr); + if (value != addr) { + + print_err_hex32(addr); + print_err_char(':'); + print_err_hex32(value); + print_err("\r\n"); + } + } + + print_debug_hex32(addr); + print_debug("\r\nDRAM verified\r\n"); +} + + +void ram_check(unsigned long start, unsigned long stop) +{ + int result; + + + + + + print_debug("Testing DRAM : "); + print_debug_hex32(start); + print_debug("-"); + print_debug_hex32(stop); + print_debug("\r\n"); + ram_fill(start, stop); + ram_verify(start, stop); + print_debug("Done.\n"); +} + + +static void enumerate_ht_chain(void) +{ + + + + + + unsigned next_unitid, last_unitid;; + next_unitid = 1; + do { + uint32_t id; + uint8_t hdr_type, pos; + last_unitid = next_unitid; + + id = pci_read_config32(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x00); + + if (((id & 0xffff) == 0x0000) || ((id & 0xffff) == 0xffff) || + (((id >> 16) & 0xffff) == 0xffff) || + (((id >> 16) & 0xffff) == 0x0000)) { + break; + } + hdr_type = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x0e); + pos = 0; + hdr_type &= 0x7f; + + if ((hdr_type == 0) || + (hdr_type == 1)) { + pos = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x34); + } + while(pos != 0) { + uint8_t cap; + cap = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 0); + if (cap == 0x08) { + uint16_t flags; + flags = pci_read_config16(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 2); + if ((flags >> 13) == 0) { + unsigned count; + flags &= ~0x1f; + flags |= next_unitid & 0x1f; + count = (flags >> 5) & 0x1f; + pci_write_config16(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 2, flags); + next_unitid += count; + break; + } + } + pos = pci_read_config8(( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)), pos + 1); + } + } while((last_unitid != next_unitid) && (next_unitid <= 0x1f)); +} + + + +static void enable_smbus(void) +{ + device_t dev; + dev = pci_locate_device(((((0x746b) & 0xFFFF) << 16) | ((0x1022) & 0xFFFF)), 0); + if (dev == (0xffffffffU)) { + die("SMBUS controller not found\r\n"); + } + uint8_t enable; + print_debug("SMBus controller enabled\r\n"); + pci_write_config32(dev, 0x58, 0x1000 | 1); + enable = pci_read_config8(dev, 0x41); + pci_write_config8(dev, 0x41, enable | (1 << 7)); +} + + +static inline void smbus_delay(void) +{ + outb(0x80, 0x80); +} + +static int smbus_wait_until_ready(void) +{ + unsigned long loops; + loops = (100*1000*10); + do { + unsigned short val; + smbus_delay(); + val = inw(0x1000 + 0xe0); + if ((val & 0x800) == 0) { + break; + } + } while(--loops); + return loops?0:-1; +} + +static int smbus_wait_until_done(void) +{ + unsigned long loops; + loops = (100*1000*10); + do { + unsigned short val; + smbus_delay(); + + val = inw(0x1000 + 0xe0); + if (((val & 0x8) == 0) | ((val & 0x437) != 0)) { + break; + } + } while(--loops); + return loops?0:-1; +} + +static int smbus_read_byte(unsigned device, unsigned address) +{ + unsigned char global_control_register; + unsigned char global_status_register; + unsigned char byte; + + if (smbus_wait_until_ready() < 0) { + return -1; + } + + + + outw(inw(0x1000 + 0xe2) & ~((1<<10)|(1<<9)|(1<<8)|(1<<4)), 0x1000 + 0xe2); + + outw(((device & 0x7f) << 1) | 1, 0x1000 + 0xe4); + + outb(address & 0xFF, 0x1000 + 0xe8); + + outw((inw(0x1000 + 0xe2) & ~7) | (0x2), 0x1000 + 0xe2); + + + + outw(inw(0x1000 + 0xe0), 0x1000 + 0xe0); + + + outw(0, 0x1000 + 0xe6); + + + outw((inw(0x1000 + 0xe2) | (1 << 3)), 0x1000 + 0xe2); + + + + if (smbus_wait_until_done() < 0) { + return -1; + } + + global_status_register = inw(0x1000 + 0xe0); + + + byte = inw(0x1000 + 0xe6) & 0xff; + + if (global_status_register != (1 << 4)) { + return -1; + } + return byte; +} + + + + + + + + + + + + + + + +static void setup_resource_map(const unsigned int *register_values, int max) +{ + int i; + print_debug("setting up resource map....\r\n"); + for(i = 0; i < max; i += 3) { + device_t dev; + unsigned where; + unsigned long reg; + + + + + + + dev = register_values[i] & ~0xff; + where = register_values[i] & 0xff; + reg = pci_read_config32(dev, where); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32(dev, where, reg); + + + + + + + } + print_debug("done.\r\n"); +} + +static void setup_default_resource_map(void) +{ + static const unsigned int register_values[] = { + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x44) & 0xFF)), 0x0000f8f8, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x0000f8f8, 0x00000001, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x54) & 0xFF)), 0x0000f8f8, 0x00000002, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000f8f8, 0x00000003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x64) & 0xFF)), 0x0000f8f8, 0x00000004, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x6C) & 0xFF)), 0x0000f8f8, 0x00000005, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x74) & 0xFF)), 0x0000f8f8, 0x00000006, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x7C) & 0xFF)), 0x0000f8f8, 0x00000007, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x40) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x48) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x50) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x58) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x60) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x68) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x70) & 0xFF)), 0x0000f8fc, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x78) & 0xFF)), 0x0000f8fc, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x8C) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x94) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x9C) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA4) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xAC) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB4) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xBC) & 0xFF)), 0x00000048, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x80) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x88) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x90) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x98) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA0) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA8) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB0) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB8) & 0xFF)), 0x000000f0, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC4) & 0xFF)), 0xFE000FC8, 0x01fff000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xCC) & 0xFF)), 0xFE000FC8, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD4) & 0xFF)), 0xFE000FC8, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xDC) & 0xFF)), 0xFE000FC8, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC0) & 0xFF)), 0xFE000FCC, 0x00000003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC8) & 0xFF)), 0xFE000FCC, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD0) & 0xFF)), 0xFE000FCC, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD8) & 0xFF)), 0xFE000FCC, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE0) & 0xFF)), 0x0000FC88, 0xff000003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE4) & 0xFF)), 0x0000FC88, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE8) & 0xFF)), 0x0000FC88, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xEC) & 0xFF)), 0x0000FC88, 0x00000000, + }; + int max; + max = sizeof(register_values)/sizeof(register_values[0]); + setup_resource_map(register_values, max); +} + +static void sdram_set_registers(void) +{ + static const unsigned int register_values[] = { + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x44) & 0xFF)), 0x0000f8f8, 0x003f0000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x0000f8f8, 0x00000001, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x54) & 0xFF)), 0x0000f8f8, 0x00000002, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000f8f8, 0x00000003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x64) & 0xFF)), 0x0000f8f8, 0x00000004, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x6C) & 0xFF)), 0x0000f8f8, 0x00000005, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x74) & 0xFF)), 0x0000f8f8, 0x00000006, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x7C) & 0xFF)), 0x0000f8f8, 0x00000007, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x40) & 0xFF)), 0x0000f8fc, 0x00000003, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x48) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x50) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x58) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x60) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x68) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x70) & 0xFF)), 0x0000f8fc, 0x00400000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x78) & 0xFF)), 0x0000f8fc, 0x00400000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00000048, 0x00e1ff00, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x8C) & 0xFF)), 0x00000048, 0x00dfff00, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x94) & 0xFF)), 0x00000048, 0x00e3ff00, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x9C) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA4) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xAC) & 0xFF)), 0x00000048, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB4) & 0xFF)), 0x00000048, 0x00000b00, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xBC) & 0xFF)), 0x00000048, 0x00fe0b00, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x80) & 0xFF)), 0x000000f0, 0x00e00003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x88) & 0xFF)), 0x000000f0, 0x00d80003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x90) & 0xFF)), 0x000000f0, 0x00e20003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0x98) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA0) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xA8) & 0xFF)), 0x000000f0, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB0) & 0xFF)), 0x000000f0, 0x00000a03, + + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xB8) & 0xFF)), 0x000000f0, 0x00400003, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC4) & 0xFF)), 0xFE000FC8, 0x0000d000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xCC) & 0xFF)), 0xFE000FC8, 0x000ff000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD4) & 0xFF)), 0xFE000FC8, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xDC) & 0xFF)), 0xFE000FC8, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC0) & 0xFF)), 0xFE000FCC, 0x0000d003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xC8) & 0xFF)), 0xFE000FCC, 0x00001013, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD0) & 0xFF)), 0xFE000FCC, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xD8) & 0xFF)), 0xFE000FCC, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE0) & 0xFF)), 0x0000FC88, 0xff000003, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE4) & 0xFF)), 0x0000FC88, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xE8) & 0xFF)), 0x0000FC88, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((1) & 0x07) << 8) | ((0xEC) & 0xFF)), 0x0000FC88, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x40) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x44) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x48) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x4C) & 0xFF)), 0x001f01fe, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x50) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x54) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x58) & 0xFF)), 0x001f01fe, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x001f01fe, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x60) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x64) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x68) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x6C) & 0xFF)), 0xC01f01ff, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x70) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x74) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x78) & 0xFF)), 0xC01f01ff, 0x00000000, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x7C) & 0xFF)), 0xC01f01ff, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x80) & 0xFF)), 0xffff8888, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x88) & 0xFF)), 0xe8088008, 0x03623125, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x8c) & 0xFF)), 0xff8fe08e, 0x00000930, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x90) & 0xFF)), 0xf0000000, + (4 << 25)|(0 << 24)| + (0 << 23)|(0 << 22)|(0 << 21)|(0 << 20)| + (1 << 19)|(1 << 18)|(0 << 17)|(0 << 16)| + (2 << 14)|(0 << 13)|(0 << 12)| + (0 << 11)|(0 << 10)|(0 << 9)|(0 << 8)| + (0 << 3) |(0 << 1) |(0 << 0), + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x94) & 0xFF)), 0xc180f0f0, 0x0e2b0a05, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x07) << 8) | ((0x98) & 0xFF)), 0xfc00ffff, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x58) & 0xFF)), 0xffe0e0e0, 0x00000000, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x5C) & 0xFF)), 0x0000003e, 0x00000000, + + + + + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((3) & 0x07) << 8) | ((0x60) & 0xFF)), 0xffffff00, 0x00000000, + }; + int i; + int max; + print_debug("setting up CPU0 northbridge registers\r\n"); + max = sizeof(register_values)/sizeof(register_values[0]); + for(i = 0; i < max; i += 3) { + device_t dev; + unsigned where; + unsigned long reg; + + + + + + + dev = register_values[i] & ~0xff; + where = register_values[i] & 0xff; + reg = pci_read_config32(dev, where); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32(dev, where, reg); + + + + + + + + } + print_debug("done.\r\n"); +} + + +struct dimm_size { + unsigned long side1; + unsigned long side2; +}; +static struct dimm_size spd_get_dimm_size(unsigned device) +{ + + struct dimm_size sz; + int value, low; + sz.side1 = 0; + sz.side2 = 0; + + + + + + value = smbus_read_byte(device, 3); + if (value < 0) return sz; + sz.side1 += value & 0xf; + + value = smbus_read_byte(device, 4); + if (value < 0) return sz; + sz.side1 += value & 0xf; + + value = smbus_read_byte(device, 17); + if (value < 0) return sz; + sz.side1 += log2(value & 0xff); + + + value = smbus_read_byte(device, 7); + if (value < 0) return sz; + value &= 0xff; + value <<= 8; + + low = smbus_read_byte(device, 6); + if (low < 0) return sz; + value = value | (low & 0xff); + sz.side1 += log2(value); + + + value = smbus_read_byte(device, 5); + if (value <= 1) return sz; + + + sz.side2 = sz.side1; + + value = smbus_read_byte(device, 3); + if (value < 0) return sz; + if ((value & 0xf0) == 0) return sz; + sz.side2 -= (value & 0x0f); + sz.side2 += ((value >> 4) & 0x0f); + + value = smbus_read_byte(device, 4); + if (value < 0) return sz; + sz.side2 -= (value & 0x0f); + sz.side2 += ((value >> 4) & 0x0f); + return sz; +} + +static unsigned spd_to_dimm(unsigned device) +{ + return (device - (0xa << 3)); +} + +static void set_dimm_size(struct dimm_size sz, unsigned index) +{ + uint32_t base0, base1, map; + + + print_debug("set_dimm_size: ("); + print_debug_hex32(sz.side1); + print_debug_char(','); + print_debug_hex32(sz.side2); + print_debug_char(','); + print_debug_hex32(index); + print_debug(")\r\n"); + + if (sz.side1 != sz.side2) { + sz.side2 = 0; + } + map = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x80); + map &= ~(0xf << (index + 4)); + + + + + + + + base0 = base1 = 0; + + + if (sz.side1 >= (25 + 3)) { + base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1; + map |= (sz.side1 - (25 + 3)) << (index *4); + } + + + if (sz.side2 >= (25 + 3)) { + base1 = (1 << ((sz.side2 - (25 + 3)) + 21)) | 1; + } + + + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (((index << 1)+0)<<2), base0); + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (((index << 1)+1)<<2), base1); + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x80, map); +} + +static void spd_set_ram_size(void) +{ + unsigned device; + for(device = (0xa << 3); + device <= ((0xa << 3) +1); + device += 1) + { + struct dimm_size sz; + sz = spd_get_dimm_size(device); + set_dimm_size(sz, spd_to_dimm(device)); + } +} + +static void set_top_mem(unsigned tom_k) +{ + + if (!tom_k) { + die("No memory"); + } + + msr_t msr; + msr.lo = (tom_k & 0x003fffff) << 10; + msr.hi = (tom_k & 0xffc00000) >> 22; + wrmsr(0xC001001A, msr); + + + + + + + +} + +static void order_dimms(void) +{ + unsigned long tom; + unsigned mask; + unsigned index; + + + tom = 0; + for(;;) { + + unsigned canidate; + uint32_t csbase, csmask; + unsigned size; + csbase = 0; + canidate = 0; + for(index = 0; index < 8; index++) { + uint32_t value; + value = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (index << 2)); + + + if (!(value & 1)) { + continue; + } + + + if (value <= csbase) { + continue; + } + + + if (tom & (1 << (index + 24))) { + continue; + } + + csbase = value; + canidate = index; + } + + if (csbase == 0) { + break; + } + + + tom |= (1 << (canidate + 24)); + + + size = csbase >> 21; + + + csbase = (tom << 21) | 1; + + + tom += size; + + + csmask = ((size -1) << 21); + csmask |= 0xfe00; + + + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x40 + (canidate << 2), csbase); + + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x60 + (canidate << 2), csmask); + + } + set_top_mem((tom & ~0xff000000) << 15); +} + +static void spd_set_dram_timing(void) +{ + +} + +static void spd_set_ecc_mode(void) +{ + unsigned long dcl; + dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90); + + dcl &= ~(1<<17); + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl); + +} +static void sdram_set_spd_registers(void) +{ + spd_set_ram_size(); + spd_set_dram_timing(); + spd_set_ecc_mode(); + order_dimms(); +} + + +static void sdram_enable(void) +{ + unsigned long dcl; + + + dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90); + print_debug("dcl: "); + print_debug_hex32(dcl); + print_debug("\r\n"); + dcl |= (1<<3); + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl); + dcl &= ~(1<<3); + dcl &= ~(1<<0); + dcl &= ~(1<<1); + dcl &= ~(1<<2); + dcl |= (1<<8); + pci_write_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90, dcl); + + print_debug("Initializing memory: "); + int loops = 0; + do { + dcl = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8)), 0x90); + loops += 1; + if ((loops & 1023) == 0) { + print_debug("."); + } + } while(((dcl & (1<<8)) != 0) && (loops < 300000)); + if (loops >= 300000) { + print_debug(" failed\r\n"); + } else { + print_debug(" done\r\n"); + } + +} + +static void sdram_first_normal_reference(void) {} +static void sdram_enable_refresh(void) {} +static void sdram_special_finishup(void) {} + + +static void setup_coherent_ht_domain(void) +{ + static const unsigned int register_values[] = { + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x40) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x44) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x48) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x4c) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x50) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x54) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x58) & 0xFF)), 0xfff0f0f0, 0x00010101, + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x5c) & 0xFF)), 0xfff0f0f0, 0x00010101, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x68) & 0xFF)), 0x00800000, 0x0f00840f, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x6C) & 0xFF)), 0xffffff8c, 0x00000000 | (1 << 6) |(1 << 5)| (1 << 4), + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x84) & 0xFF)), 0x00009c05, 0x11110020, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x88) & 0xFF)), 0xfffff0ff, 0x00000200, + + ( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x07) << 8) | ((0x94) & 0xFF)), 0xff000000, 0x00ff0000, + + + + + + }; + int i; + int max; + print_debug("setting up coherent ht domain....\r\n"); + max = sizeof(register_values)/sizeof(register_values[0]); + for(i = 0; i < max; i += 3) { + device_t dev; + unsigned where; + unsigned long reg; + + + + + + + dev = register_values[i] & ~0xff; + where = register_values[i] & 0xff; + reg = pci_read_config32(dev, where); + reg &= register_values[i+1]; + reg |= register_values[i+2]; + pci_write_config32(dev, where, reg); + + + + + + + } + print_debug("done.\r\n"); +} + + +void sdram_no_memory(void) +{ + print_err("No memory!!\r\n"); + while(1) { + hlt(); + } +} + + +void sdram_initialize(void) +{ + print_debug("Ram1\r\n"); + + sdram_set_registers(); + + print_debug("Ram2\r\n"); + + sdram_set_spd_registers(); + + print_debug("Ram3\r\n"); + + + + + sdram_enable(); + + print_debug("Ram4\r\n"); + sdram_first_normal_reference(); + + print_debug("Ram5\r\n"); + sdram_enable_refresh(); + sdram_special_finishup(); + + print_debug("Ram6\r\n"); +} + + +static int boot_cpu(void) +{ + volatile unsigned long *local_apic; + unsigned long apic_id; + int bsp; + msr_t msr; + msr = rdmsr(0x1b); + bsp = !!(msr.lo & (1 << 8)); + if (bsp) { + print_debug("Bootstrap processor\r\n"); + } else { + print_debug("Application processor\r\n"); + } + + return bsp; +} + +static int cpu_init_detected(void) +{ + unsigned long dcl; + int cpu_init; + + unsigned long htic; + + htic = pci_read_config32(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((0) & 0x7) << 8)), 0x6c); + + cpu_init = (htic & (1<<6)); + if (cpu_init) { + print_debug("CPU INIT Detected.\r\n"); + } + return cpu_init; +} + + +static void print_debug_pci_dev(unsigned dev) +{ + print_debug("PCI: "); + print_debug_hex8((dev >> 16) & 0xff); + print_debug_char(':'); + print_debug_hex8((dev >> 11) & 0x1f); + print_debug_char('.'); + print_debug_hex8((dev >> 8) & 7); +} + +static void print_pci_devices(void) +{ + device_t dev; + for(dev = ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((0) & 0x7) << 8)); + dev <= ( (((0) & 0xFF) << 16) | (((0x1f) & 0x1f) << 11) | (((0x7) & 0x7) << 8)); + dev += ( (((0) & 0xFF) << 16) | (((0) & 0x1f) << 11) | (((1) & 0x7) << 8))) { + uint32_t id; + id = pci_read_config32(dev, 0x00); + if (((id & 0xffff) == 0x0000) || ((id & 0xffff) == 0xffff) || + (((id >> 16) & 0xffff) == 0xffff) || + (((id >> 16) & 0xffff) == 0x0000)) { + continue; + } + print_debug_pci_dev(dev); + print_debug("\r\n"); + } +} + + +static void dump_pci_device(unsigned dev) +{ + int i; + print_debug_pci_dev(dev); + print_debug("\r\n"); + + for(i = 0; i <= 255; i++) { + unsigned char val; + if ((i & 0x0f) == 0) { + print_debug_hex8(i); + print_debug_char(':'); + } + val = pci_read_config8(dev, i); + print_debug_char(' '); + print_debug_hex8(val); + if ((i & 0x0f) == 0x0f) { + print_debug("\r\n"); + } + } +} + +static void dump_spd_registers(void) +{ + unsigned device; + device = (0xa << 3); + print_debug("\r\n"); + while(device <= ((0xa << 3) +1)) { + int i; + print_debug("dimm: "); + print_debug_hex8(device); + for(i = 0; i < 256; i++) { + int status; + unsigned char byte; + if ((i & 0xf) == 0) { + print_debug("\r\n"); + print_debug_hex8(i); + print_debug(": "); + } + status = smbus_read_byte(device, i); + if (status < 0) { + print_debug("bad device\r\n"); + break; + } + byte = status & 0xff; + print_debug_hex8(byte); + print_debug_char(' '); + } + device += 1; + print_debug("\r\n"); + } +} + + +static void main(void) +{ + uart_init(); + console_init(); + + + + + + + + if (boot_cpu() && !cpu_init_detected()) { + setup_default_resource_map(); + setup_coherent_ht_domain(); + enumerate_ht_chain(); + print_pci_devices(); + enable_smbus(); + sdram_initialize(); + + dump_spd_registers(); + dump_pci_device(( (((0) & 0xFF) << 16) | (((0x18) & 0x1f) << 11) | (((2) & 0x7) << 8))); + + + msr_t msr; + msr = rdmsr(0xC001001A); + print_debug("TOP_MEM: "); + print_debug_hex32(msr.hi); + print_debug_hex32(msr.lo); + print_debug("\r\n"); + ram_check(0x00000000, msr.lo); + } +} diff --git a/util/romcc/tests/simple_test48.c b/util/romcc/tests/simple_test48.c new file mode 100644 index 0000000000..779ecfdc96 --- /dev/null +++ b/util/romcc/tests/simple_test48.c @@ -0,0 +1,13 @@ + +static void main(void) +{ + int i; + i = __builtin_inb(0x1234); + goto next; + int j; + j = __builtin_inb(0xabcd); + __builtin_outb(j, 0xef90); + next: + __builtin_outb(i, 0x5678); + +} diff --git a/util/romcc/tests/simple_test49.c b/util/romcc/tests/simple_test49.c new file mode 100644 index 0000000000..a5a12c1cd8 --- /dev/null +++ b/util/romcc/tests/simple_test49.c @@ -0,0 +1,15 @@ + +static void main(void) +{ + int i; + i = __builtin_inb(0x1234); + if (i == 23) { + for(;;) { + int j; + j = __builtin_inb(0xabcd); + __builtin_outb(j, 0xef90); + } + } + __builtin_outb(i, 0x5678); + +} diff --git a/util/romcc/tests/simple_test50.c b/util/romcc/tests/simple_test50.c new file mode 100644 index 0000000000..51c7c505c9 --- /dev/null +++ b/util/romcc/tests/simple_test50.c @@ -0,0 +1,43 @@ +typedef __builtin_div_t div_t; +typedef __builtin_ldiv_t ldiv_t; +typedef __builtin_udiv_t udiv_t; +typedef __builtin_uldiv_t uldiv_t; + +static div_t div(int numer, int denom) +{ + return __builtin_div(numer, denom); +} +static ldiv_t ldiv(long numer, long denom) +{ + return __builtin_ldiv(numer, denom); +} +static udiv_t udiv(unsigned numer, unsigned denom) +{ + return __builtin_udiv(numer, denom); +} +static uldiv_t uldiv(unsigned long numer, unsigned long denom) +{ + return __builtin_uldiv(numer, denom); +} + +static void main(void) +{ + volatile long *sval = (volatile long *)0x1234; + volatile unsigned long *uval = (volatile unsigned long *)0x5678; + long int a, b, c; + unsigned long e, f, g; + ldiv_t lresult; + uldiv_t ulresult; + + a = sval[0]; + b = sval[1]; + lresult = ldiv(a,b); + sval[2] = lresult.quot; + sval[3] = lresult.rem; + + e = uval[0]; + f = uval[1]; + ulresult = uldiv(e, f); + uval[2] = ulresult.quot; + uval[3] = ulresult.rem; +} diff --git a/util/romcc/tests/simple_test51.c b/util/romcc/tests/simple_test51.c new file mode 100644 index 0000000000..424ed7a0d1 --- /dev/null +++ b/util/romcc/tests/simple_test51.c @@ -0,0 +1,11 @@ +static void main(void) +{ + unsigned long a,b,c, d; + volatile unsigned long *val = (volatile unsigned long *)0x1234; + a = val[0]; + b = val[1]; + d = val[2]; + c = a*b; + val[3] = c; + a = c / d; +} diff --git a/util/romcc/tests/simple_test52.c b/util/romcc/tests/simple_test52.c new file mode 100644 index 0000000000..3349de245c --- /dev/null +++ b/util/romcc/tests/simple_test52.c @@ -0,0 +1,5 @@ +static void main(void) +{ + if (__builtin_inb(0x1b)) { + } +} diff --git a/util/romcc/tests/simple_test53.c b/util/romcc/tests/simple_test53.c new file mode 100644 index 0000000000..a27f98618d --- /dev/null +++ b/util/romcc/tests/simple_test53.c @@ -0,0 +1,10 @@ +static void main(void) +{ + unsigned dev; + dev = __builtin_inl(0xcd); + if (dev == (0xffffffffU)) { + do { + __builtin_hlt(); + } while(1); + } +} diff --git a/util/romcc/tests/simple_test54.c b/util/romcc/tests/simple_test54.c new file mode 100644 index 0000000000..ec3208f4aa --- /dev/null +++ b/util/romcc/tests/simple_test54.c @@ -0,0 +1,771 @@ +struct syscall_result { + long val; + int errno; +}; + +static struct syscall_result syscall_return(long result) +{ + struct syscall_result res; + if (((unsigned long)result) >= ((unsigned long)-125)) { + res.errno = - result; + res.val = -1; + } else { + res.errno = 0; + res.val = result; + } + return res; +} + +static struct syscall_result syscall0(unsigned long nr) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr)); + return syscall_return(res); +} + +static struct syscall_result syscall1(unsigned long nr, unsigned long arg1) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr), "b" (arg1)); + return syscall_return(res); + +} + +static struct syscall_result syscall2(unsigned long nr, unsigned long arg1, unsigned long arg2) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr), "b" (arg1), "c" (arg2)); + return syscall_return(res); + +} + + +static struct syscall_result syscall3(unsigned long nr, unsigned long arg1, unsigned long arg2, + unsigned long arg3) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3)); + return syscall_return(res); + +} + +static struct syscall_result syscall4(unsigned long nr, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3), "S" (arg4)); + return syscall_return(res); + +} + +static struct syscall_result syscall5(unsigned long nr, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +{ + long res; + asm volatile( + "int $0x80" + : "=a" (res) + : "a" (nr), "b" (arg1), "c" (arg2), "d" (arg3), + "S" (arg4), "D" (arg5)); + return syscall_return(res); + +} + +#define NR_exit 1 +#define NR_fork 2 +#define NR_read 3 +#define NR_write 4 +#define NR_open 5 +#define NR_close 6 +#define NR_waitpid 7 +#define NR_creat 8 +#define NR_link 9 +#define NR_unlink 10 +#define NR_execve 11 +#define NR_chdir 12 +#define NR_time 13 +#define NR_mknod 14 +#define NR_chmod 15 +#define NR_lchown 16 +#define NR_break 17 +#define NR_oldstat 18 +#define NR_lseek 19 +#define NR_getpid 20 +#define NR_mount 21 +#define NR_umount 22 +#define NR_setuid 23 +#define NR_getuid 24 +#define NR_stime 25 +#define NR_ptrace 26 +#define NR_alarm 27 +#define NR_oldfstat 28 +#define NR_pause 29 +#define NR_utime 30 +#define NR_stty 31 +#define NR_gtty 32 +#define NR_access 33 +#define NR_nice 34 +#define NR_ftime 35 +#define NR_sync 36 +#define NR_kill 37 +#define NR_rename 38 +#define NR_mkdir 39 +#define NR_rmdir 40 +#define NR_dup 41 +#define NR_pipe 42 +#define NR_times 43 +#define NR_prof 44 +#define NR_brk 45 +#define NR_setgid 46 +#define NR_getgid 47 +#define NR_signal 48 +#define NR_geteuid 49 +#define NR_getegid 50 +#define NR_acct 51 +#define NR_umount2 52 +#define NR_lock 53 +#define NR_ioctl 54 +#define NR_fcntl 55 +#define NR_mpx 56 +#define NR_setpgid 57 +#define NR_ulimit 58 +#define NR_oldolduname 59 +#define NR_umask 60 +#define NR_chroot 61 +#define NR_ustat 62 +#define NR_dup2 63 +#define NR_getppid 64 +#define NR_getpgrp 65 +#define NR_setsid 66 +#define NR_sigaction 67 +#define NR_sgetmask 68 +#define NR_ssetmask 69 +#define NR_setreuid 70 +#define NR_setregid 71 +#define NR_sigsuspend 72 +#define NR_sigpending 73 +#define NR_sethostname 74 +#define NR_setrlimit 75 +#define NR_getrlimit 76 +#define NR_getrusage 77 +#define NR_gettimeofday 78 +#define NR_settimeofday 79 +#define NR_getgroups 80 +#define NR_setgroups 81 +#define NR_select 82 +#define NR_symlink 83 +#define NR_oldlstat 84 +#define NR_readlink 85 +#define NR_uselib 86 +#define NR_swapon 87 +#define NR_reboot 88 +#define NR_readdir 89 +#define NR_mmap 90 +#define NR_munmap 91 +#define NR_truncate 92 +#define NR_ftruncate 93 +#define NR_fchmod 94 +#define NR_fchown 95 +#define NR_getpriority 96 +#define NR_setpriority 97 +#define NR_profil 98 +#define NR_statfs 99 +#define NR_fstatfs 100 +#define NR_ioperm 101 +#define NR_socketcall 102 +#define NR_syslog 103 +#define NR_setitimer 104 +#define NR_getitimer 105 +#define NR_stat 106 +#define NR_lstat 107 +#define NR_fstat 108 +#define NR_olduname 109 +#define NR_iopl 110 +#define NR_vhangup 111 +#define NR_idle 112 +#define NR_vm86old 113 +#define NR_wait4 114 +#define NR_swapoff 115 +#define NR_sysinfo 116 +#define NR_ipc 117 +#define NR_fsync 118 +#define NR_sigreturn 119 +#define NR_clone 120 +#define NR_setdomainname 121 +#define NR_uname 122 +#define NR_modify_ldt 123 +#define NR_adjtimex 124 +#define NR_mprotect 125 +#define NR_sigprocmask 126 +#define NR_create_module 127 +#define NR_init_module 128 +#define NR_delete_module 129 +#define NR_get_kernel_syms 130 +#define NR_quotactl 131 +#define NR_getpgid 132 +#define NR_fchdir 133 +#define NR_bdflush 134 +#define NR_sysfs 135 +#define NR_personality 136 +#define NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define NR_setfsuid 138 +#define NR_setfsgid 139 +#define NR__llseek 140 +#define NR_getdents 141 +#define NR__newselect 142 +#define NR_flock 143 +#define NR_msync 144 +#define NR_readv 145 +#define NR_writev 146 +#define NR_getsid 147 +#define NR_fdatasync 148 +#define NR__sysctl 149 +#define NR_mlock 150 +#define NR_munlock 151 +#define NR_mlockall 152 +#define NR_munlockall 153 +#define NR_sched_setparam 154 +#define NR_sched_getparam 155 +#define NR_sched_setscheduler 156 +#define NR_sched_getscheduler 157 +#define NR_sched_yield 158 +#define NR_sched_get_priority_max 159 +#define NR_sched_get_priority_min 160 +#define NR_sched_rr_get_interval 161 +#define NR_nanosleep 162 +#define NR_mremap 163 +#define NR_setresuid 164 +#define NR_getresuid 165 +#define NR_vm86 166 +#define NR_query_module 167 +#define NR_poll 168 +#define NR_nfsservctl 169 +#define NR_setresgid 170 +#define NR_getresgid 171 +#define NR_prctl 172 +#define NR_rt_sigreturn 173 +#define NR_rt_sigaction 174 +#define NR_rt_sigprocmask 175 +#define NR_rt_sigpending 176 +#define NR_rt_sigtimedwait 177 +#define NR_rt_sigqueueinfo 178 +#define NR_rt_sigsuspend 179 +#define NR_pread 180 +#define NR_pwrite 181 +#define NR_chown 182 +#define NR_getcwd 183 +#define NR_capget 184 +#define NR_capset 185 +#define NR_sigaltstack 186 +#define NR_sendfile 187 +#define NR_getpmsg 188 /* some people actually want streams */ +#define NR_putpmsg 189 /* some people actually want streams */ +#define NR_vfork 190 + +typedef long ssize_t; +typedef unsigned long size_t; + +/* Standard file descriptors */ +#define STDIN_FILENO 0 /* Standard input */ +#define STDOUT_FILENO 1 /* Standard output */ +#define STDERR_FILENO 2 /* Standard error output */ + +static ssize_t write(int fd, const void *buf, size_t count) +{ + struct syscall_result res; + res = syscall3(NR_write, fd, (unsigned long)buf, count); + return res.val; +} + +static void _exit(int status) +{ + struct syscall_result res; + res = syscall1(NR_exit, status); +} + +static const char *addr_of_char(unsigned char ch) +{ + static const char byte[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, + }; + return byte + ch; +} + +static void console_tx_byte(unsigned char ch) +{ + write(STDOUT_FILENO, addr_of_char(ch), 1); +} + +static void console_tx_nibble(unsigned nibble) +{ + unsigned char digit; + digit = nibble + '0'; + if (digit > '9') { + digit += 39; + } + console_tx_byte(digit); +} + +static void console_tx_char(unsigned char byte) +{ + console_tx_byte(byte); +} + +static void console_tx_hex8(unsigned char value) +{ + console_tx_nibble((value >> 4U) & 0x0fU); + console_tx_nibble(value & 0x0fU); +} + +static void console_tx_hex16(unsigned short value) +{ + console_tx_nibble((value >> 12U) & 0x0FU); + console_tx_nibble((value >> 8U) & 0x0FU); + console_tx_nibble((value >> 4U) & 0x0FU); + console_tx_nibble(value & 0x0FU); +} + +static void console_tx_hex32(unsigned short value) +{ + console_tx_nibble((value >> 28U) & 0x0FU); + console_tx_nibble((value >> 24U) & 0x0FU); + console_tx_nibble((value >> 20U) & 0x0FU); + console_tx_nibble((value >> 16U) & 0x0FU); + console_tx_nibble((value >> 12U) & 0x0FU); + console_tx_nibble((value >> 8U) & 0x0FU); + console_tx_nibble((value >> 4U) & 0x0FU); + console_tx_nibble(value & 0x0FU); +} + +static void console_tx_string(const char *str) +{ + unsigned char ch; + while((ch = *str++) != '\0') { + console_tx_byte(ch); + } +} + +static void print_emerg_char(unsigned char byte) { console_tx_char(byte); } +static void print_emerg_hex8(unsigned char value) { console_tx_hex8(value); } +static void print_emerg_hex16(unsigned short value){ console_tx_hex16(value); } +static void print_emerg_hex32(unsigned int value) { console_tx_hex32(value); } +static void print_emerg(const char *str) { console_tx_string(str); } + +static void print_debug_char(unsigned char byte) { console_tx_char(byte); } +static void print_debug_hex8(unsigned char value) { console_tx_hex8(value); } +static void print_debug_hex16(unsigned short value){ console_tx_hex16(value); } +static void print_debug_hex32(unsigned int value) { console_tx_hex32(value); } +static void print_debug(const char *str) { console_tx_string(str); } + + +int log2(int value) +{ + /* __builtin_bsr is a exactly equivalent to the x86 machine + * instruction with the exception that it returns -1 + * when the value presented to it is zero. + * Otherwise __builtin_bsr returns the zero based index of + * the highest bit set. + */ + return __builtin_bsr(value); +} + + +static void die(const char *str) +{ + print_emerg(str); + do { + asm(" "); + } while(1); + +} + +static int smbus_read_byte(unsigned device, unsigned address) +{ + static const unsigned char dimm[] = { +0x80, 0x08, 0x07, 0x0d, 0x0a, 0x02, 0x48, 0x00, 0x04, 0x60, 0x70, 0x02, 0x82, 0x08, 0x08, 0x01, +0x0e, 0x04, 0x0c, 0x01, 0x02, 0x20, 0x00, 0x75, 0x70, 0x00, 0x00, 0x48, 0x30, 0x48, 0x2a, 0x40, +0x80, 0x80, 0x45, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + +0x80, 0x08, 0x07, 0x0d, 0x0a, 0x02, 0x48, 0x00, 0x04, 0x60, 0x70, 0x02, 0x82, 0x08, 0x08, 0x01, +0x0e, 0x04, 0x0c, 0x01, 0x02, 0x20, 0x00, 0x75, 0x70, 0x00, 0x00, 0x48, 0x30, 0x48, 0x2a, 0x40, +0x80, 0x80, 0x45, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + return dimm[(device << 8) + address]; +} + +#define SMBUS_MEM_DEVICE_START 0x00 +#define SMBUS_MEM_DEVICE_END 0x01 +#define SMBUS_MEM_DEVICE_INC 1 + +/* Function 2 */ +#define DRAM_CONFIG_HIGH 0x94 +#define DCH_MEMCLK_SHIFT 20 +#define DCH_MEMCLK_MASK 7 +#define DCH_MEMCLK_100MHZ 0 +#define DCH_MEMCLK_133MHZ 2 +#define DCH_MEMCLK_166MHZ 5 +#define DCH_MEMCLK_200MHZ 7 + +/* Function 3 */ +#define NORTHBRIDGE_CAP 0xE8 +#define NBCAP_128Bit 0x0001 +#define NBCAP_MP 0x0002 +#define NBCAP_BIG_MP 0x0004 +#define NBCAP_ECC 0x0004 +#define NBCAP_CHIPKILL_ECC 0x0010 +#define NBCAP_MEMCLK_SHIFT 5 +#define NBCAP_MEMCLK_MASK 3 +#define NBCAP_MEMCLK_100MHZ 3 +#define NBCAP_MEMCLK_133MHZ 2 +#define NBCAP_MEMCLK_166MHZ 1 +#define NBCAP_MEMCLK_200MHZ 0 +#define NBCAP_MEMCTRL 0x0100 + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; + +static unsigned spd_to_dimm(unsigned device) +{ + return (device - SMBUS_MEM_DEVICE_START); +} + +static void disable_dimm(unsigned index) +{ + print_debug("disabling dimm"); + print_debug_hex8(index); + print_debug("\r\n"); +#if 0 + pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CSBASE + (((index << 1)+0)<<2), 0); + pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CSBASE + (((index << 1)+1)<<2), 0); +#endif +} + + +struct mem_param { + uint8_t cycle_time; + uint32_t dch_memclk; +}; + +static const struct mem_param *get_mem_param(unsigned min_cycle_time) +{ + static const struct mem_param speed[] = { + { + .cycle_time = 0xa0, + .dch_memclk = DCH_MEMCLK_100MHZ << DCH_MEMCLK_SHIFT, + }, + { + .cycle_time = 0x75, + .dch_memclk = DCH_MEMCLK_133MHZ << DCH_MEMCLK_SHIFT, + }, + { + .cycle_time = 0x60, + .dch_memclk = DCH_MEMCLK_166MHZ << DCH_MEMCLK_SHIFT, + }, + { + .cycle_time = 0x50, + .dch_memclk = DCH_MEMCLK_200MHZ << DCH_MEMCLK_SHIFT, + }, + { + .cycle_time = 0x00, + }, + }; + const struct mem_param *param; + for(param = &speed[0]; param->cycle_time ; param++) { + if (min_cycle_time > (param+1)->cycle_time) { + break; + } + } + if (!param->cycle_time) { + die("min_cycle_time to low"); + } + return param; +} + +#if 1 +static void debug(int c) +{ + print_debug_char(c); + print_debug_char('\r'); + print_debug_char('\n'); +} +#endif +static const struct mem_param *spd_set_memclk(void) +{ + /* Compute the minimum cycle time for these dimms */ + const struct mem_param *param; + unsigned min_cycle_time, min_latency; + unsigned device; + uint32_t value; + + static const int latency_indicies[] = { 26, 23, 9 }; + static const unsigned char min_cycle_times[] = { + [NBCAP_MEMCLK_200MHZ] = 0x50, /* 5ns */ + [NBCAP_MEMCLK_166MHZ] = 0x60, /* 6ns */ + [NBCAP_MEMCLK_133MHZ] = 0x75, /* 7.5ns */ + [NBCAP_MEMCLK_100MHZ] = 0xa0, /* 10ns */ + }; + + +#if 0 + value = pci_read_config32(PCI_DEV(0, 0x18, 3), NORTHBRIDGE_CAP); +#else + value = 0x50; +#endif + min_cycle_time = min_cycle_times[(value >> NBCAP_MEMCLK_SHIFT) & NBCAP_MEMCLK_MASK]; + min_latency = 2; + +#if 1 + print_debug("min_cycle_time: "); + print_debug_hex8(min_cycle_time); + print_debug(" min_latency: "); + print_debug_hex8(min_latency); + print_debug("\r\n"); +#endif + + /* Compute the least latency with the fastest clock supported + * by both the memory controller and the dimms. + */ + for(device = SMBUS_MEM_DEVICE_START; + device <= SMBUS_MEM_DEVICE_END; + device += SMBUS_MEM_DEVICE_INC) + { + int new_cycle_time, new_latency; + int index; + int latencies; + int latency; + + debug('A'); + /* First find the supported CAS latencies + * Byte 18 for DDR SDRAM is interpreted: + * bit 0 == CAS Latency = 1.0 + * bit 1 == CAS Latency = 1.5 + * bit 2 == CAS Latency = 2.0 + * bit 3 == CAS Latency = 2.5 + * bit 4 == CAS Latency = 3.0 + * bit 5 == CAS Latency = 3.5 + * bit 6 == TBD + * bit 7 == TBD + */ + new_cycle_time = 0xa0; + new_latency = 5; + + latencies = smbus_read_byte(device, 18); + if (latencies <= 0) continue; + + debug('B'); + /* Compute the lowest cas latency supported */ + latency = log2(latencies) -2; + + /* Loop through and find a fast clock with a low latency */ + for(index = 0; index < 3; index++, latency++) { + int value; + debug('C'); + if ((latency < 2) || (latency > 4) || + (!(latencies & (1 << latency)))) { + continue; + } + debug('D'); + value = smbus_read_byte(device, latency_indicies[index]); + if (value < 0) continue; + + debug('E'); + /* Only increase the latency if we decreas the clock */ + if ((value >= min_cycle_time) && (value < new_cycle_time)) { + new_cycle_time = value; + new_latency = latency; +#if 1 + print_debug("device: "); + print_debug_hex8(device); + print_debug(" new_cycle_time: "); + print_debug_hex8(new_cycle_time); + print_debug(" new_latency: "); + print_debug_hex8(new_latency); + print_debug("\r\n"); +#endif + } + debug('G'); + } + debug('H'); +#if 1 + print_debug("device: "); + print_debug_hex8(device); + print_debug(" new_cycle_time: "); + print_debug_hex8(new_cycle_time); + print_debug(" new_latency: "); + print_debug_hex8(new_latency); + print_debug("\r\n"); +#endif + if (new_latency > 4){ + continue; + } + debug('I'); + /* Does min_latency need to be increased? */ + if (new_cycle_time > min_cycle_time) { + min_cycle_time = new_cycle_time; + } + /* Does min_cycle_time need to be increased? */ + if (new_latency > min_latency) { + min_latency = new_latency; + } +#if 1 + print_debug("device: "); + print_debug_hex8(device); + print_debug(" min_cycle_time: "); + print_debug_hex8(min_cycle_time); + print_debug(" min_latency: "); + print_debug_hex8(min_latency); + print_debug("\r\n"); +#endif + } + /* Make a second pass through the dimms and disable + * any that cannot support the selected memclk and cas latency. + */ + for(device = SMBUS_MEM_DEVICE_START; + device <= SMBUS_MEM_DEVICE_END; + device += SMBUS_MEM_DEVICE_INC) + { + int latencies; + int latency; + int index; + int value; + int dimm; + latencies = smbus_read_byte(device, 18); + if (latencies <= 0) { + goto dimm_err; + } + + /* Compute the lowest cas latency supported */ + latency = log2(latencies) -2; + + /* Walk through searching for the selected latency */ + for(index = 0; index < 3; index++, latency++) { + if (!(latencies & (1 << latency))) { + continue; + } + if (latency == min_latency) + break; + } + /* If I can't find the latency or my index is bad error */ + if ((latency != min_latency) || (index >= 3)) { + goto dimm_err; + } + + /* Read the min_cycle_time for this latency */ + value = smbus_read_byte(device, latency_indicies[index]); + + /* All is good if the selected clock speed + * is what I need or slower. + */ + if (value <= min_cycle_time) { + continue; + } + /* Otherwise I have an error, disable the dimm */ + dimm_err: + disable_dimm(spd_to_dimm(device)); + } +#if 1 + print_debug("min_cycle_time: "); + print_debug_hex8(min_cycle_time); + print_debug(" min_latency: "); + print_debug_hex8(min_latency); + print_debug("\r\n"); +#endif + /* Now that I know the minimum cycle time lookup the memory parameters */ + param = get_mem_param(min_cycle_time); + +#if 0 + /* Update DRAM Config High with our selected memory speed */ + value = pci_read_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_HIGH); + value &= ~(DCH_MEMCLK_MASK << DCH_MEMCLK_SHIFT); + value |= param->dch_memclk; + pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_HIGH, value); + + static const unsigned latencies[] = { 1, 5, 2 }; + /* Update DRAM Timing Low wiht our selected cas latency */ + value = pci_read_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_LOW); + value &= ~7; + value |= latencies[min_latency - 2]; + pci_write_config32(PCI_DEV(0, 0x18, 2), DRAM_CONFIG_LOW, value); +#endif + + return param; +} + +static void main(void) +{ + const struct mem_param *param; + param = spd_set_memclk(); + _exit(0); +} -- cgit v1.2.3