From fcdbce2decbf88a39b2da29ad1137a08e1d9ca95 Mon Sep 17 00:00:00 2001 From: Huayang Duan Date: Wed, 26 Sep 2018 16:33:18 +0800 Subject: mediatek/mt8183: Add DDR driver of rx dqs gating calibration part BUG=b:80501386 BRANCH=none TEST=Boots correctly on Kukui, and inits DRAM successfully with related patches. Change-Id: I504d6d5c9ea01b11a9f2a05b5ee4b5f1af87e23f Signed-off-by: Huayang Duan Reviewed-on: https://review.coreboot.org/c/28841 Tested-by: build bot (Jenkins) Reviewed-by: You-Cheng Syu Reviewed-by: Hung-Te Lin --- src/soc/mediatek/mt8183/dramc_pi_calibration_api.c | 678 ++++++++++++++++++++- src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h | 5 + 2 files changed, 666 insertions(+), 17 deletions(-) (limited to 'src/soc/mediatek') diff --git a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c index 823c512507..08264fd129 100644 --- a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c +++ b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c @@ -20,6 +20,11 @@ #include #include +enum { + GATING_START = 26, + GATING_END = GATING_START + 24, +}; + static void auto_refresh_switch(u8 chn, u8 option) { clrsetbits_le32(&ch[chn].ao.refctrl0, 1 << REFCTRL0_REFDIS_SHIFT, @@ -86,7 +91,7 @@ static void dramc_write_leveling(u8 chn, u8 rank, clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].ca_cmd[9], SHU1_CA_CMD9_RG_RK_ARFINE_TUNE_CLK_MASK, 0); - for (u8 i = 0; i < DQS_NUMBER; i++) { + for (size_t i = 0; i < DQS_NUMBER; i++) { s32 wrlevel_dq_delay = wr_level[chn][rank][i] + 0x10; assert(wrlevel_dq_delay < 0x40); @@ -117,8 +122,8 @@ static void cmd_bus_training(u8 chn, u8 rank, static void dramc_read_dbi_onoff(u8 onoff) { - for (u8 chn = 0; chn < CHANNEL_MAX; chn++) - for (u8 b = 0; b < 2; b++) + for (size_t chn = 0; chn < CHANNEL_MAX; chn++) + for (size_t b = 0; b < 2; b++) clrsetbits_le32(&ch[chn].phy.shu[0].b[b].dq[7], 0x1 << SHU1_BX_DQ7_R_DMDQMDBI_SHU_SHIFT, onoff << SHU1_BX_DQ7_R_DMDQMDBI_SHU_SHIFT); @@ -126,7 +131,7 @@ static void dramc_read_dbi_onoff(u8 onoff) static void dramc_write_dbi_onoff(u8 onoff) { - for (u8 chn = 0; chn < CHANNEL_MAX; chn++) + for (size_t chn = 0; chn < CHANNEL_MAX; chn++) clrsetbits_le32(&ch[chn].ao.shu[0].wodt, 0x1 << SHU1_WODT_DBIWR_SHIFT, onoff << SHU1_WODT_DBIWR_SHIFT); @@ -150,11 +155,9 @@ static void dramc_phy_dcm_disable(u8 chn) static void dramc_enable_phy_dcm(u8 en) { u32 broadcast_bak = dramc_get_broadcast(); - u8 chn = 0; - dramc_set_broadcast(DRAMC_BROADCAST_OFF); - for (chn = 0; chn < CHANNEL_MAX ; chn++) { + for (size_t chn = 0; chn < CHANNEL_MAX ; chn++) { clrbits_le32(&ch[chn].phy.b[0].dll_fine_tune[1], 0x1 << 20); clrbits_le32(&ch[chn].phy.b[1].dll_fine_tune[1], 0x1 << 20); clrbits_le32(&ch[chn].phy.ca_dll_fine_tune[1], 0x1 << 20); @@ -191,17 +194,17 @@ static void dramc_enable_phy_dcm(u8 en) clrsetbits_le32(&shu->b[1].dq[7], mask, value); clrsetbits_le32(&shu->ca_cmd[7], mask, value); } - } - if (!en) - dramc_phy_dcm_disable(chn); + if (!en) + dramc_phy_dcm_disable(chn); + } dramc_set_broadcast(broadcast_bak); } static void reset_delay_chain_before_calibration(void) { - for (u8 chn = 0; chn < CHANNEL_MAX; chn++) - for (u8 rank = 0; rank < RANK_MAX; rank++) { + for (size_t chn = 0; chn < CHANNEL_MAX; chn++) + for (size_t rank = 0; rank < RANK_MAX; rank++) { struct dramc_ddrphy_regs_shu_rk *rk; rk = &ch[chn].phy.shu[0].rk[rank]; clrbits_le32(&rk->ca_cmd[0], 0xffffff << 0); @@ -233,8 +236,6 @@ static void dramc_rx_input_delay_tracking_init_by_freq(u8 chn) void dramc_apply_pre_calibration_config(void) { - u8 shu = 0; - dramc_enable_phy_dcm(0); reset_delay_chain_before_calibration(); @@ -242,7 +243,7 @@ void dramc_apply_pre_calibration_config(void) setbits_le32(&ch[0].ao.spcmdctrl, 0x1 << 24); clrsetbits_le32(&ch[0].ao.shu[0].scintv, 0x1f << 1, 0x1b << 1); - for (shu = 0; shu < DRAM_DFS_SHUFFLE_MAX; shu++) + for (size_t shu = 0; shu < DRAM_DFS_SHUFFLE_MAX; shu++) setbits_le32(&ch[0].ao.shu[shu].conf[3], 0x1ff << 0); clrbits_le32(&ch[0].ao.dramctrl, 0x1 << 18); @@ -254,10 +255,10 @@ void dramc_apply_pre_calibration_config(void) dramc_write_dbi_onoff(DBI_OFF); dramc_read_dbi_onoff(DBI_OFF); - for (int chn = 0; chn < CHANNEL_MAX; chn++) { + for (size_t chn = 0; chn < CHANNEL_MAX; chn++) { setbits_le32(&ch[chn].ao.spcmdctrl, 0x1 << 29); setbits_le32(&ch[chn].ao.dqsoscr, 0x1 << 24); - for (shu = 0; shu < DRAM_DFS_SHUFFLE_MAX; shu++) + for (size_t shu = 0; shu < DRAM_DFS_SHUFFLE_MAX; shu++) setbits_le32(&ch[chn].ao.shu[shu].scintv, 0x1 << 30); clrbits_le32(&ch[chn].ao.dummy_rd, (0x1 << 7) | (0x7 << 20)); @@ -287,6 +288,646 @@ void dramc_apply_pre_calibration_config(void) } } +static void rx_dqs_isi_pulse_cg_switch(u8 chn, bool flag) +{ + for (size_t b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.b[b].dq[6], 1 << 5, + (flag ? 1 : 0) << 5); +} + +static void dramc_set_rank_engine2(u8 chn, u8 rank) +{ + setbits_le32(&ch[chn].ao.dramctrl, 0x1 << 1); + clrbits_le32(&ch[chn].ao.test2_4, TEST2_4_TESTAGENTRKSEL_MASK); + clrsetbits_le32(&ch[chn].ao.test2_4, TEST2_4_TESTAGENTRK_MASK, + rank << TEST2_4_TESTAGENTRK_SHIFT); +} + +static void dramc_engine2_init(u8 chn, u8 rank, u32 size, bool testaudpat) +{ + const u32 pat0 = 0x55; + const u32 pat1 = 0xaa; + const u32 addr = 0; + + dramc_set_rank_engine2(chn, rank); + + clrbits_le32(&ch[chn].ao.dummy_rd, + (0x1 << DUMMY_RD_DQSG_DMYRD_EN_SHIFT) | + (0x1 << DUMMY_RD_DQSG_DMYWR_EN_SHIFT) | + (0x1 << DUMMY_RD_DUMMY_RD_EN_SHIFT) | + (0x1 << DUMMY_RD_SREF_DMYRD_EN_SHIFT) | + (0x1 << DUMMY_RD_DMY_RD_DBG_SHIFT) | + (0x1 << DUMMY_RD_DMY_WR_DBG_SHIFT)); + clrbits_le32(&ch[chn].nao.testchip_dma1, + 0x1 << TESTCHIP_DMA1_DMA_LP4MATAB_OPT_SHIFT); + clrbits_le32(&ch[chn].ao.test2_3, + (0x1 << TEST2_3_TEST2W_SHIFT) | + (0x1 << TEST2_3_TEST2R_SHIFT) | + (0x1 << TEST2_3_TEST1_SHIFT)); + clrsetbits_le32(&ch[chn].ao.test2_0, + TEST2_0_PAT0_MASK | TEST2_0_PAT1_MASK, + (pat0 << TEST2_0_PAT0_SHIFT) | + (pat1 << TEST2_0_PAT1_SHIFT)); + write32(&ch[chn].ao.test2_1, (addr << 4) & 0x00ffffff); + write32(&ch[chn].ao.test2_2, (size << 4) & 0x00ffffff); + + clrsetbits_le32(&ch[chn].ao.test2_4, + (0x1 << TEST2_4_TESTAUDMODE_SHIFT) | + (0x1 << TEST2_4_TESTAUDBITINV_SHIFT) | + (0x1 << TEST2_4_TESTXTALKPAT_SHIFT), + ((!testaudpat ? 1 : 0) << TEST2_4_TESTXTALKPAT_SHIFT) | + ((testaudpat ? 1 : 0) << TEST2_4_TESTAUDMODE_SHIFT) | + ((testaudpat ? 1 : 0) << TEST2_4_TESTAUDBITINV_SHIFT)); + + if (!testaudpat) { + clrbits_le32(&ch[chn].ao.test2_4, + (0x1 << TEST2_4_TEST_REQ_LEN1_SHIFT) | + (0x1 << TEST2_4_TESTSSOPAT_SHIFT) | + (0x1 << TEST2_4_TESTSSOXTALKPAT_SHIFT)); + setbits_le32(&ch[chn].ao.perfctl0, + 0x1 << PERFCTL0_RWOFOEN_SHIFT); + } else { + clrsetbits_le32(&ch[chn].ao.test2_4, + TEST2_4_TESTAUDINIT_MASK | TEST2_4_TESTAUDINC_MASK, + (0x11 << TEST2_4_TESTAUDINIT_SHIFT) | + (0xd << TEST2_4_TESTAUDINC_SHIFT)); + } + clrsetbits_le32(&ch[chn].ao.test2_3, + TEST2_3_TESTCNT_MASK | (0x1 << TEST2_3_TESTAUDPAT_SHIFT), + (testaudpat ? 1 : 0) << TEST2_3_TESTAUDPAT_SHIFT); +} + +static void dramc_engine2_check_complete(u8 chn) +{ + u32 u4loop_count = 0; + + /* In some case test engine finished but the complete signal late come, + * system will wait very long time. Hence, we set a timeout here. + * After system receive complete signal or wait until time out + * it will return, the caller will check compare result to verify + * whether engine success. + */ + while ((read32(&ch[chn].nao.testrpt) & 0x1) == 0) { + udelay(1); + u4loop_count++; + + if (u4loop_count > MAX_CMP_CPT_WAIT_LOOP) { + dramc_dbg("MEASURE_A timeout\n"); + break; + } + } +} + +static u32 dramc_engine2_run(u8 chn, enum dram_te_op wr) +{ + u32 result; + + if (wr == TE_OP_READ_CHECK) { + clrbits_le32(&ch[chn].ao.test2_4, + 0x1 << TEST2_4_TESTAUDMODE_SHIFT); + } else if (wr == TE_OP_WRITE_READ_CHECK) { + clrsetbits_le32(&ch[chn].ao.test2_3, + (0x1 << TEST2_3_TEST2R_SHIFT) | + (0x1 << TEST2_3_TEST1_SHIFT), + 0x1 << TEST2_3_TEST2W_SHIFT); + + dramc_engine2_check_complete(chn); + clrbits_le32(&ch[chn].ao.test2_3, + (0x1 << TEST2_3_TEST2W_SHIFT) | + (0x1 << TEST2_3_TEST2R_SHIFT) | + (0x1 << TEST2_3_TEST1_SHIFT)); + udelay(1); + } + + /* Do read test */ + clrsetbits_le32(&ch[chn].ao.test2_3, + (0x1 << TEST2_3_TEST2W_SHIFT) | (0x1 << TEST2_3_TEST1_SHIFT), + 0x1 << TEST2_3_TEST2R_SHIFT); + + dramc_engine2_check_complete(chn); + + udelay(1); + result = read32(&ch[chn].nao.cmp_err); + clrbits_le32(&ch[chn].ao.test2_3, + (0x1 << TEST2_3_TEST2W_SHIFT) | + (0x1 << TEST2_3_TEST2R_SHIFT) | + (0x1 << TEST2_3_TEST1_SHIFT)); + + return result; +} + +static void dramc_engine2_end(u8 chn) +{ + clrbits_le32(&ch[chn].ao.test2_4, 0x1 << 17); +} + +static void find_gating_window(u32 result_r, u32 result_f, u32 *debug_cnt, + u8 dly_coarse_large, u8 dly_coarse_0p5t, u8 *pass_begin, + u8 *pass_count, u8 *dly_fine_xt, u32 *coarse_tune, u8 *dqs_high) +{ + u16 debug_cnt_perbyte; + u8 pass_count_1[DQS_NUMBER]; + + for (u8 dqs = 0; dqs < DQS_NUMBER; dqs++) { + u8 dqs_result_r = (u8) ((result_r >> (8 * dqs)) & 0xff); + u8 dqs_result_f = (u8) ((result_f >> (8 * dqs)) & 0xff); + + debug_cnt_perbyte = (u16) debug_cnt[dqs]; + if (dqs_result_r != 0 || dqs_result_f != 0 || + debug_cnt_perbyte != GATING_GOLDEND_DQSCNT) + continue; + + if (pass_begin[dqs] == 0) { + pass_begin[dqs] = 1; + pass_count_1[dqs] = 0; + dramc_dbg("[Byte %d]First pass (%d, %d, %d)\n", + dqs, dly_coarse_large, + dly_coarse_0p5t, *dly_fine_xt); + } + + if (pass_begin[dqs] == 1) + pass_count_1[dqs]++; + + if (pass_begin[dqs] == 1 && + pass_count_1[dqs] * DQS_GW_FINE_STEP > DQS_GW_FINE_END) + dqs_high[dqs] = 0; + + if (pass_count_1[0] * DQS_GW_FINE_STEP > DQS_GW_FINE_END && + pass_count_1[1] * DQS_GW_FINE_STEP > DQS_GW_FINE_END) { + dramc_dbg("All bytes gating window > 1 coarse_tune," + " Early break\n"); + *dly_fine_xt = DQS_GW_FINE_END; + *coarse_tune = GATING_END; + } + } +} + +static void find_dly_tune(u8 chn, u8 dly_coarse_large, u8 dly_coarse_0p5t, + u8 dly_fine_xt, u8 *dqs_high, u8 *dly_coarse_large_cnt, + u8 *dly_coarse_0p5t_cnt, u8 *dly_fine_tune_cnt, u8 *dqs_trans) +{ + for (size_t dqs = 0; dqs < DQS_NUMBER; dqs++) { + u32 dqs_cnt = read32(&ch[chn].phy_nao.misc_phy_stben_b[dqs]); + dqs_cnt = (dqs_cnt >> 16) & 3; + + if (dqs_cnt == 3) + dqs_high[dqs]++; + + if (dqs_high[dqs] * DQS_GW_FINE_STEP <= 16) + continue; + + switch (dqs_cnt) { + case 3: + dly_coarse_large_cnt[dqs] = dly_coarse_large; + dly_coarse_0p5t_cnt[dqs] = dly_coarse_0p5t; + dly_fine_tune_cnt[dqs] = dly_fine_xt; + dqs_trans[dqs] = 1; + break; + case 2: + case 1: + dqs_trans[dqs]++; + break; + case 0: + dqs_high[dqs] = 0; + break; + } + } +} + +static void dram_phy_reset(u8 chn) +{ + setbits_le32(&ch[chn].ao.ddrconf0, 1 << DDRCONF0_RDATRST_SHIFT); + setbits_le32(&ch[chn].phy.misc_ctrl1, 1 << MISC_CTRL1_R_DMPHYRST_SHIFT); + clrbits_le32(&ch[chn].phy.b[0].dq[9], (1 << 4) | (1 << 0)); + clrbits_le32(&ch[chn].phy.b[1].dq[9], (1 << 4) | (1 << 0)); + + udelay(1); + setbits_le32(&ch[chn].phy.b[1].dq[9], (1 << 4) | (1 << 0)); + setbits_le32(&ch[chn].phy.b[0].dq[9], (1 << 4) | (1 << 0)); + clrbits_le32(&ch[chn].phy.misc_ctrl1, 1 << MISC_CTRL1_R_DMPHYRST_SHIFT); + clrbits_le32(&ch[chn].ao.ddrconf0, 1 << DDRCONF0_RDATRST_SHIFT); +} + +static void dramc_set_gating_mode(u8 chn, bool mode) +{ + u8 vref = 0, burst = 0; + + if (mode) { + vref = 2; + burst = 1; + } + + clrsetbits_le32(&ch[chn].ao.stbcal1, 0x1 << 5, burst << 5); + setbits_le32(&ch[chn].ao.stbcal, 0x1 << 30); + + for (size_t b = 0; b < 2; b++) { + clrsetbits_le32(&ch[chn].phy.b[b].dq[6], 0x3 << 14, vref << 14); + setbits_le32(&ch[chn].phy.b[b].dq[9], 0x1 << 5); + clrbits_le32(&ch[chn].phy.b[b].dq[9], (0x1 << 4) | (0x1 << 0)); + setbits_le32(&ch[chn].phy.b[b].dq[9], (0x1 << 4) | (0x1 << 0)); + } +} + +static void dramc_rx_dqs_gating_cal_pre(u8 chn, u8 rank) +{ + rx_dqs_isi_pulse_cg_switch(chn, DISABLE); + clrbits_le32(&ch[chn].ao.refctrl0, 1 << REFCTRL0_PBREFEN_SHIFT); + + dramc_hw_gating_onoff(chn, GATING_OFF); + + setbits_le32(&ch[chn].ao.stbcal1, 1 << STBCAL1_STBENCMPEN_SHIFT); + setbits_le32(&ch[chn].ao.stbcal1, 1 << STBCAL1_STBCNT_LATCH_EN_SHIFT); + clrbits_le32(&ch[chn].ao.ddrconf0, 1 << DDRCONF0_DM4TO1MODE_SHIFT); + setbits_le32(&ch[chn].ao.spcmd, 1 << SPCMD_DQSGCNTEN_SHIFT); + + udelay(4); + setbits_le32(&ch[chn].ao.spcmd, 1 << SPCMD_DQSGCNTRST_SHIFT); + udelay(1); + clrbits_le32(&ch[chn].ao.spcmd, 1 << SPCMD_DQSGCNTRST_SHIFT); + clrsetbits_le32(&ch[chn].phy.misc_ctrl1, + 1 << MISC_CTRL1_R_DMSTBENCMP_RK_OPT_SHIFT, + rank << MISC_CTRL1_R_DMSTBENCMP_RK_OPT_SHIFT); + +} + +static void dramc_write_dqs_gating_result(u8 chn, u8 rank, + u8 *best_coarse_tune2t, u8 *best_coarse_tune0p5t, + u8 *best_coarse_tune2t_p1, u8 *best_coarse_tune0p5t_p1, + u8 *best_fine_tune) +{ + u8 best_coarse_rodt[DQS_NUMBER], best_coarse_0p5t_rodt[DQS_NUMBER]; + u8 best_coarse_rodt_p1[DQS_NUMBER]; + u8 best_coarse_0p5t_rodt_p1[DQS_NUMBER]; + + rx_dqs_isi_pulse_cg_switch(chn, ENABLE); + + write32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0, + ((u32) best_coarse_tune2t[0] << + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_SHIFT) | + ((u32) best_coarse_tune2t[1] << + SHURK_SELPH_DQSG0_TX_DLY_DQS1_GATED_SHIFT) | + ((u32) best_coarse_tune2t_p1[0] << + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_P1_SHIFT) | + ((u32) best_coarse_tune2t_p1[1] << + SHURK_SELPH_DQSG0_TX_DLY_DQS1_GATED_P1_SHIFT)); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg1, + ((u32) best_coarse_tune0p5t[0] << + SHURK_SELPH_DQSG1_REG_DLY_DQS0_GATED_SHIFT) | + ((u32) best_coarse_tune0p5t[1] << + SHURK_SELPH_DQSG1_REG_DLY_DQS1_GATED_SHIFT) | + ((u32) best_coarse_tune0p5t_p1[0] << + SHURK_SELPH_DQSG1_REG_DLY_DQS0_GATED_P1_SHIFT) | + ((u32) best_coarse_tune0p5t_p1[1] << + SHURK_SELPH_DQSG1_REG_DLY_DQS1_GATED_P1_SHIFT)); + + for (size_t dqs = 0; dqs < DQS_NUMBER; dqs++) { + u8 tmp_value = (best_coarse_tune2t[dqs] << 3) + + best_coarse_tune0p5t[dqs]; + + if (tmp_value >= 11) { + tmp_value -= 11; + best_coarse_rodt[dqs] = tmp_value >> 3; + best_coarse_0p5t_rodt[dqs] = + tmp_value - (best_coarse_rodt[dqs] << 3); + + tmp_value = (best_coarse_tune2t_p1[dqs] << 3) + + best_coarse_tune0p5t_p1[dqs] - 11; + best_coarse_rodt_p1[dqs] = tmp_value >> 3; + best_coarse_0p5t_rodt_p1[dqs] = + tmp_value - (best_coarse_rodt_p1[dqs] << 3); + + dramc_dbg("Best RODT dly(2T, 0.5T) = (%d, %d)\n", + best_coarse_rodt[dqs], + best_coarse_0p5t_rodt[dqs]); + } else { + best_coarse_rodt[dqs] = 0; + best_coarse_0p5t_rodt[dqs] = 0; + best_coarse_rodt_p1[dqs] = 4; + best_coarse_0p5t_rodt_p1[dqs] = 4; + dramc_dbg("RxdqsGatingCal error: best_coarse_tune2t:%d" + " is already 0. RODT cannot be -1 coarse\n", + dqs); + } + } + + write32(&ch[chn].ao.shu[0].rk[rank].selph_odten0, + ((u32) best_coarse_rodt[0] << + SHURK_SELPH_ODTEN0_TXDLY_B0_RODTEN_SHIFT) | + ((u32) best_coarse_rodt[1] << + SHURK_SELPH_ODTEN0_TXDLY_B1_RODTEN_SHIFT) | + ((u32) best_coarse_rodt_p1[0] << + SHURK_SELPH_ODTEN0_TXDLY_B0_RODTEN_P1_SHIFT) | + ((u32) best_coarse_rodt_p1[1] << + SHURK_SELPH_ODTEN0_TXDLY_B1_RODTEN_P1_SHIFT)); + write32(&ch[chn].ao.shu[0].rk[rank].selph_odten1, + ((u32) best_coarse_0p5t_rodt[0] << + SHURK_SELPH_ODTEN1_DLY_B0_RODTEN_SHIFT) | + ((u32) best_coarse_0p5t_rodt[1] << + SHURK_SELPH_ODTEN1_DLY_B1_RODTEN_SHIFT) | + ((u32) best_coarse_0p5t_rodt_p1[0] << + SHURK_SELPH_ODTEN1_DLY_B0_RODTEN_P1_SHIFT) | + ((u32) best_coarse_0p5t_rodt_p1[1] << + SHURK_SELPH_ODTEN1_DLY_B1_RODTEN_P1_SHIFT)); + + write32(&ch[chn].ao.shu[0].rk[rank].dqsien, + best_fine_tune[0] | (best_fine_tune[1] << 8)); +} + +static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank) +{ + u8 dqs; + const u8 mr1_value = 0x56; + u8 pass_begin[DQS_NUMBER] = {0}, pass_count[DQS_NUMBER] = {0}; + u8 min_coarse_tune2t[DQS_NUMBER], min_coarse_tune0p5t[DQS_NUMBER], + min_fine_tune[DQS_NUMBER]; + u8 best_fine_tune[DQS_NUMBER], best_coarse_tune0p5t[DQS_NUMBER], + best_coarse_tune2t[DQS_NUMBER]; + u8 best_coarse_tune0p5t_p1[DQS_NUMBER], + best_coarse_tune2t_p1[DQS_NUMBER]; + u8 dqs_high[DQS_NUMBER] = {0}, dqs_transition[DQS_NUMBER] = {0}; + u8 dly_coarse_large_cnt[DQS_NUMBER] = {0}, + dly_coarse_0p5t_cnt[DQS_NUMBER] = {0}, + dly_fine_tune_cnt[DQS_NUMBER] = {0}; + u32 coarse_start = GATING_START, coarse_end = GATING_END; + u32 debug_cnt[DQS_NUMBER]; + + struct reg_value regs_bak[] = { + {&ch[chn].ao.stbcal, 0x0}, + {&ch[chn].ao.stbcal1, 0x0}, + {&ch[chn].ao.ddrconf0, 0x0}, + {&ch[chn].ao.spcmd, 0x0}, + {&ch[chn].ao.refctrl0, 0x0}, + {&ch[chn].phy.b[0].dq[6], 0x0}, + {&ch[chn].phy.b[1].dq[6], 0x0}, + }; + for (size_t i = 0; i < ARRAY_SIZE(regs_bak); i++) + regs_bak[i].value = read32(regs_bak[i].addr); + + dramc_mode_reg_write_by_rank(chn, rank, 0x1, mr1_value | 0x80); + dramc_rx_dqs_gating_cal_pre(chn, rank); + + u32 dummy_rd_backup = read32(&ch[chn].ao.dummy_rd); + dramc_engine2_init(chn, rank, 0x23, 1); + + dramc_dbg("[Gating]\n"); + for (u32 coarse_tune = coarse_start; coarse_tune < coarse_end; + coarse_tune += DQS_GW_COARSE_STEP) { + u32 dly_coarse_large_rodt = 0, dly_coarse_0p5t_rodt = 0; + u32 dly_coarse_large_rodt_p1 = 4, dly_coarse_0p5t_rodt_p1 = 4; + u8 dly_coarse_large = coarse_tune / RX_DQS_CTL_LOOP; + u8 dly_coarse_0p5t = coarse_tune % RX_DQS_CTL_LOOP; + u32 dly_coarse_large_p1 = + (coarse_tune + DQS_GW_FREQ_DIV) / RX_DQS_CTL_LOOP; + u32 dly_coarse_0p5t_p1 = + (coarse_tune + DQS_GW_FREQ_DIV) % RX_DQS_CTL_LOOP; + u32 value = (dly_coarse_large << 3) + dly_coarse_0p5t; + + if (value >= 11) { + value -= 11; + dly_coarse_large_rodt = value >> 3; + dly_coarse_0p5t_rodt = + value - (dly_coarse_large_rodt << 3); + + value = (dly_coarse_large << 3) + dly_coarse_0p5t - 11; + dly_coarse_large_rodt_p1 = value >> 3; + dly_coarse_0p5t_rodt_p1 = + value - (dly_coarse_large_rodt_p1 << 3); + } + + write32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0, + ((u32) dly_coarse_large << + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_SHIFT) | + ((u32) dly_coarse_large << + SHURK_SELPH_DQSG0_TX_DLY_DQS1_GATED_SHIFT) | + (dly_coarse_large_p1 << + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_P1_SHIFT) | + (dly_coarse_large_p1 << + SHURK_SELPH_DQSG0_TX_DLY_DQS1_GATED_P1_SHIFT)); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg1, + ((u32) dly_coarse_0p5t << + SHURK_SELPH_DQSG1_REG_DLY_DQS0_GATED_SHIFT) | + ((u32) dly_coarse_0p5t << + SHURK_SELPH_DQSG1_REG_DLY_DQS1_GATED_SHIFT) | + (dly_coarse_0p5t_p1 << + SHURK_SELPH_DQSG1_REG_DLY_DQS0_GATED_P1_SHIFT) | + (dly_coarse_0p5t_p1 << + SHURK_SELPH_DQSG1_REG_DLY_DQS1_GATED_P1_SHIFT)); + write32(&ch[chn].ao.shu[0].rk[rank].selph_odten0, + (dly_coarse_large_rodt << + SHURK_SELPH_ODTEN0_TXDLY_B0_RODTEN_SHIFT) | + (dly_coarse_large_rodt << + SHURK_SELPH_ODTEN0_TXDLY_B1_RODTEN_SHIFT) | + (dly_coarse_large_rodt_p1 << + SHURK_SELPH_ODTEN0_TXDLY_B0_RODTEN_P1_SHIFT) | + (dly_coarse_large_rodt_p1 << + SHURK_SELPH_ODTEN0_TXDLY_B1_RODTEN_P1_SHIFT)); + write32(&ch[chn].ao.shu[0].rk[rank].selph_odten1, + (dly_coarse_0p5t_rodt << + SHURK_SELPH_ODTEN1_DLY_B0_RODTEN_SHIFT) | + (dly_coarse_0p5t_rodt << + SHURK_SELPH_ODTEN1_DLY_B1_RODTEN_SHIFT) | + (dly_coarse_0p5t_rodt_p1 << + SHURK_SELPH_ODTEN1_DLY_B0_RODTEN_P1_SHIFT) | + (dly_coarse_0p5t_rodt_p1 << + SHURK_SELPH_ODTEN1_DLY_B1_RODTEN_P1_SHIFT)); + + for (u8 dly_fine_xt = DQS_GW_FINE_START; + dly_fine_xt < DQS_GW_FINE_END; + dly_fine_xt += DQS_GW_FINE_STEP) { + + dramc_set_gating_mode(chn, 0); + + write32(&ch[chn].ao.shu[0].rk[rank].dqsien, + dly_fine_xt | (dly_fine_xt << 8)); + + dram_phy_reset(chn); + setbits_le32(&ch[chn].ao.spcmd, + 1 << SPCMD_DQSGCNTRST_SHIFT); + udelay(1); + clrbits_le32(&ch[chn].ao.spcmd, + 1 << SPCMD_DQSGCNTRST_SHIFT); + + dramc_engine2_run(chn, TE_OP_READ_CHECK); + + u32 result_r = read32(&ch[chn].phy.misc_stberr_rk0_r) & + MISC_STBERR_RK_R_STBERR_RK_R_MASK; + u32 result_f = read32(&ch[chn].phy.misc_stberr_rk0_f) & + MISC_STBERR_RK_F_STBERR_RK_F_MASK; + debug_cnt[0] = read32(&ch[chn].nao.dqsgnwcnt[0]); + debug_cnt[1] = (debug_cnt[0] >> 16) & 0xffff; + debug_cnt[0] &= 0xffff; + + dramc_set_gating_mode(chn, 1); + dramc_engine2_run(chn, TE_OP_READ_CHECK); + + find_dly_tune(chn, dly_coarse_large, dly_coarse_0p5t, + dly_fine_xt, dqs_high, dly_coarse_large_cnt, + dly_coarse_0p5t_cnt, + dly_fine_tune_cnt, dqs_transition); + + dramc_dbg("%d %d %d |", dly_coarse_large, + dly_coarse_0p5t, dly_fine_xt); + for (dqs = 0; dqs < DQS_NUMBER; dqs++) + dramc_dbg("%X ", debug_cnt[dqs]); + + dramc_dbg(" |"); + for (dqs = 0; dqs < DQS_NUMBER; dqs++) { + dramc_dbg("(%X %X)", + (result_f >> (DQS_BIT_NUMBER * dqs)) & 0xff, + (result_r >> (DQS_BIT_NUMBER * dqs)) & 0xff); + } + + dramc_dbg("\n"); + find_gating_window(result_r, result_f, debug_cnt, + dly_coarse_large, dly_coarse_0p5t, pass_begin, + pass_count, &dly_fine_xt, &coarse_tune, + dqs_high); + } + } + + dramc_engine2_end(chn); + write32(&ch[chn].ao.dummy_rd, dummy_rd_backup); + + for (dqs = 0; dqs < DQS_NUMBER; dqs++) { + pass_count[dqs] = dqs_transition[dqs]; + min_fine_tune[dqs] = dly_fine_tune_cnt[dqs]; + min_coarse_tune0p5t[dqs] = dly_coarse_0p5t_cnt[dqs]; + min_coarse_tune2t[dqs] = dly_coarse_large_cnt[dqs]; + + u8 tmp_offset = pass_count[dqs] * DQS_GW_FINE_STEP / 2; + u8 tmp_value = min_fine_tune[dqs] + tmp_offset; + best_fine_tune[dqs] = tmp_value % RX_DLY_DQSIENSTB_LOOP; + + tmp_offset = tmp_value / RX_DLY_DQSIENSTB_LOOP; + tmp_value = min_coarse_tune0p5t[dqs] + tmp_offset; + best_coarse_tune0p5t[dqs] = tmp_value % RX_DQS_CTL_LOOP; + + tmp_offset = tmp_value / RX_DQS_CTL_LOOP; + best_coarse_tune2t[dqs] = min_coarse_tune2t[dqs] + tmp_offset; + tmp_value = best_coarse_tune0p5t[dqs] + DQS_GW_FREQ_DIV; + best_coarse_tune0p5t_p1[dqs] = tmp_value % RX_DQS_CTL_LOOP; + + tmp_offset = tmp_value / RX_DQS_CTL_LOOP; + best_coarse_tune2t_p1[dqs] = + best_coarse_tune2t[dqs] + tmp_offset; + } + + for (dqs = 0; dqs < DQS_NUMBER; dqs++) + dramc_show("Best DQS%d dly(2T, 0.5T, fine tune)" + " = (%d, %d, %d)\n", dqs, best_coarse_tune2t[dqs], + best_coarse_tune0p5t[dqs], best_fine_tune[dqs]); + + for (dqs = 0; dqs < DQS_NUMBER; dqs++) + dramc_show("Best DQS%d coarse dly(2T, 0.5T, fine tune)" + " = (%d, %d, %d)\n", dqs, best_coarse_tune2t_p1[dqs], + best_coarse_tune0p5t_p1[dqs], best_fine_tune[dqs]); + + for (size_t i = 0; i < ARRAY_SIZE(regs_bak); i++) + write32(regs_bak[i].addr, regs_bak[i].value); + + dramc_mode_reg_write_by_rank(chn, rank, 0x1, mr1_value & 0x7f); + + dramc_write_dqs_gating_result(chn, rank, best_coarse_tune2t, + best_coarse_tune0p5t, best_coarse_tune2t_p1, + best_coarse_tune0p5t_p1, best_fine_tune); + + dram_phy_reset(chn); +} + +static void dramc_rx_dqs_gating_post_process(u8 chn) +{ + u8 dqs, rank_rx_dvs, dqsinctl; + u32 read_dqsinctl, rankinctl_root, xrtr2r, reg_tx_dly_dqsgated_min = 3; + u8 txdly_cal_min = 0xff, txdly_cal_max = 0, tx_dly_dqs_gated = 0; + u32 best_coarse_tune2t[RANK_MAX][DQS_NUMBER]; + u32 best_coarse_tune2t_p1[RANK_MAX][DQS_NUMBER]; + + rank_rx_dvs = reg_tx_dly_dqsgated_min - 1; + + for (size_t b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.shu[0].b[b].dq[7], + SHU1_BX_DQ7_R_DMRANKRXDVS_MASK, + rank_rx_dvs << SHU1_BX_DQ7_R_DMRANKRXDVS_SHIFT); + + for (size_t rank = 0; rank < RANK_MAX; rank++) { + u32 dqsg0 = read32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0); + for (dqs = 0; dqs < DQS_NUMBER; dqs++) { + best_coarse_tune2t[rank][dqs] = + (dqsg0 >> (dqs * 8)) & + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_MASK; + best_coarse_tune2t_p1[rank][dqs] = + ((dqsg0 >> (dqs * 8)) & + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_P1_MASK) >> + SHURK_SELPH_DQSG0_TX_DLY_DQS0_GATED_P1_SHIFT; + dramc_dbg("Rank%d best DQS%d dly(2T,(P1)2T)=(%d, %d)\n", + rank, dqs, best_coarse_tune2t[rank][dqs], + best_coarse_tune2t_p1[rank][dqs]); + + tx_dly_dqs_gated = best_coarse_tune2t[rank][dqs]; + txdly_cal_min = MIN(txdly_cal_min, tx_dly_dqs_gated); + + tx_dly_dqs_gated = best_coarse_tune2t_p1[rank][dqs]; + txdly_cal_max = MAX(txdly_cal_max, tx_dly_dqs_gated); + } + } + + dqsinctl = reg_tx_dly_dqsgated_min - txdly_cal_min; + dramc_dbg("Dqsinctl:%d, tx_dly_dqsgated_min %d, txdly_cal_min %d\n", + dqsinctl, reg_tx_dly_dqsgated_min, txdly_cal_min); + + if (dqsinctl != 0) { + txdly_cal_min += dqsinctl; + txdly_cal_max += dqsinctl; + + for (size_t rank = 0; rank < RANK_MAX; rank++) { + dramc_dbg("Rank: %d\n", rank); + for (dqs = 0; dqs < DQS_NUMBER; dqs++) { + best_coarse_tune2t[rank][dqs] += dqsinctl; + best_coarse_tune2t_p1[rank][dqs] += dqsinctl; + + dramc_dbg("Best DQS%d dly(2T) = (%d)\n", + dqs, best_coarse_tune2t[rank][dqs]); + dramc_dbg("Best DQS%d P1 dly(2T) = (%d)\n", + dqs, + best_coarse_tune2t_p1[rank][dqs]); + } + + write32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0, + (best_coarse_tune2t[rank][0] << 0) | + (best_coarse_tune2t[rank][1] << 8) | + (best_coarse_tune2t_p1[rank][0] << 4) | + (best_coarse_tune2t_p1[rank][1] << 12)); + } + } + + read_dqsinctl = (read32(&ch[chn].ao.shu[0].rk[0].dqsctl) & + SHURK_DQSCTL_DQSINCTL_MASK) - dqsinctl; + rankinctl_root = (read_dqsinctl >= 3) ? (read_dqsinctl - 3) : 0; + + clrsetbits_le32(&ch[chn].ao.shu[0].rk[0].dqsctl, + SHURK_DQSCTL_DQSINCTL_MASK, + read_dqsinctl << SHURK_DQSCTL_DQSINCTL_SHIFT); + clrsetbits_le32(&ch[chn].ao.shu[0].rk[1].dqsctl, + SHURK_DQSCTL_DQSINCTL_MASK, + read_dqsinctl << SHURK_DQSCTL_DQSINCTL_SHIFT); + clrsetbits_le32(&ch[chn].ao.shu[0].rankctl, + SHU_RANKCTL_RANKINCTL_PHY_MASK | + SHU_RANKCTL_RANKINCTL_MASK | SHU_RANKCTL_RANKINCTL_ROOT1_MASK, + (read_dqsinctl << SHU_RANKCTL_RANKINCTL_PHY_SHIFT) | + (rankinctl_root << SHU_RANKCTL_RANKINCTL_SHIFT) | + (rankinctl_root << SHU_RANKCTL_RANKINCTL_ROOT1_SHIFT)); + + xrtr2r = MIN(8 + txdly_cal_max + 1, 12); + clrsetbits_le32(&ch[chn].ao.shu[0].actim_xrt, + SHU_ACTIM_XRT_XRTR2R_MASK, + xrtr2r << SHU_ACTIM_XRT_XRTR2R_SHIFT); + + dramc_dbg("Tx_dly_DQS gated check: min %d max %d, changeDQSINCTL=%d," + " DQSINCTL=%d, RANKINCTL=%d, XRTR2R=%d\n", + txdly_cal_min, txdly_cal_max, dqsinctl, + read_dqsinctl, rankinctl_root, xrtr2r); +} + void dramc_calibrate_all_channels(const struct sdram_params *pams) { for (u8 chn = 0; chn < CHANNEL_MAX; chn++) { @@ -296,6 +937,9 @@ void dramc_calibrate_all_channels(const struct sdram_params *pams) cmd_bus_training(chn, rk, pams); dramc_write_leveling(chn, rk, pams->wr_level); auto_refresh_switch(chn, 1); + dramc_rx_dqs_gating_cal(chn, rk); } + + dramc_rx_dqs_gating_post_process(chn); } } diff --git a/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h b/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h index 6fc3ef71f4..3fb8c25d47 100644 --- a/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h +++ b/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h @@ -111,6 +111,11 @@ enum { RESTORE_VALUE }; +struct reg_value { + u32 *addr; + u32 value; +}; + enum { DQ_DIV_SHIFT = 3, DQ_DIV_MASK = BIT(DQ_DIV_SHIFT) - 1, -- cgit v1.2.3