diff options
Diffstat (limited to 'ext/mcpat/cacti/mat.cc')
-rw-r--r--[-rwxr-xr-x] | ext/mcpat/cacti/mat.cc | 3282 |
1 files changed, 1707 insertions, 1575 deletions
diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc index ef98107c7..447996053 100755..100644 --- a/ext/mcpat/cacti/mat.cc +++ b/ext/mcpat/cacti/mat.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,371 +37,369 @@ #include "mat.h" Mat::Mat(const DynamicParameter & dyn_p) - :dp(dyn_p), - power_subarray_out_drv(), - delay_fa_tag(0), delay_cam(0), - delay_before_decoder(0), delay_bitline(0), - delay_wl_reset(0), delay_bl_restore(0), - delay_searchline(0), delay_matchchline(0), - delay_cam_sl_restore(0), delay_cam_ml_reset(0), - delay_fa_ram_wl(0),delay_hit_miss_reset(0), - delay_hit_miss(0), - subarray(dp, dp.fully_assoc), - power_bitline(), per_bitline_read_energy(0), - deg_bl_muxing(dp.deg_bl_muxing), - num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), - delay_writeback(0), - cell(subarray.cell), cam_cell(subarray.cam_cell), - is_dram(dyn_p.is_dram), - pure_cam(dyn_p.pure_cam), - num_mats(dp.num_mats), - power_sa(), delay_sa(0), - leak_power_sense_amps_closed_page_state(0), - leak_power_sense_amps_open_page_state(0), - delay_subarray_out_drv(0), - delay_comparator(0), power_comparator(), - num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), - num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), - num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir) -{ - assert(num_subarrays_per_mat <= 4); - assert(num_subarrays_per_row <= 2); - is_fa = (dp.fully_assoc) ? true : false; - camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. - - if (is_fa || pure_cam) - num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; - - if (dp.use_inp_params == 1) { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - - } - - double number_sa_subarray; - - if (!is_fa && !pure_cam) - { - number_sa_subarray = subarray.num_cols / deg_bl_muxing; - } - else if (is_fa && !pure_cam) - { - number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; - } - - else - { - number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; - } - - int num_dec_signals = subarray.num_rows; - double C_ld_bit_mux_dec_out = 0; - double C_ld_sa_mux_lev_1_dec_out = 0; - double C_ld_sa_mux_lev_2_dec_out = 0; - double R_wire_wl_drv_out; - - if (!is_fa && !pure_cam) - { - R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + : dp(dyn_p), + power_subarray_out_drv(), + delay_fa_tag(0), delay_cam(0), + delay_before_decoder(0), delay_bitline(0), + delay_wl_reset(0), delay_bl_restore(0), + delay_searchline(0), delay_matchchline(0), + delay_cam_sl_restore(0), delay_cam_ml_reset(0), + delay_fa_ram_wl(0), delay_hit_miss_reset(0), + delay_hit_miss(0), + subarray(dp, dp.fully_assoc), + power_bitline(), per_bitline_read_energy(0), + deg_bl_muxing(dp.deg_bl_muxing), + num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), + delay_writeback(0), + cell(subarray.cell), cam_cell(subarray.cam_cell), + is_dram(dyn_p.is_dram), + pure_cam(dyn_p.pure_cam), + num_mats(dp.num_mats), + power_sa(), delay_sa(0), + leak_power_sense_amps_closed_page_state(0), + leak_power_sense_amps_open_page_state(0), + delay_subarray_out_drv(0), + delay_comparator(0), power_comparator(), + num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), + num_subarrays_per_mat(dp.num_subarrays / dp.num_mats), + num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) { + assert(num_subarrays_per_mat <= 4); + assert(num_subarrays_per_row <= 2); + is_fa = (dp.fully_assoc) ? true : false; + camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. + + if (is_fa || pure_cam) { + num_subarrays_per_row = num_subarrays_per_mat > 2 ? + num_subarrays_per_mat / 2 : num_subarrays_per_mat; } - else if (is_fa && !pure_cam) - { - R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; + + if (dp.use_inp_params == 1) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + + } + + double number_sa_subarray; + + if (!is_fa && !pure_cam) { + number_sa_subarray = subarray.num_cols / deg_bl_muxing; + } else if (is_fa && !pure_cam) { + number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + } + + else { + number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; } - else - { + + int num_dec_signals = subarray.num_rows; + double C_ld_bit_mux_dec_out = 0; + double C_ld_sa_mux_lev_1_dec_out = 0; + double C_ld_sa_mux_lev_2_dec_out = 0; + double R_wire_wl_drv_out; + + if (!is_fa && !pure_cam) { + R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + } else if (is_fa && !pure_cam) { + R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; + } else { R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; } - double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA - double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; - - if (deg_bl_muxing > 1) - { - C_ld_bit_mux_dec_out = - (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - - if (dp.Ndsam_lev_1 > 1) - { - C_ld_sa_mux_lev_1_dec_out = - (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - if (dp.Ndsam_lev_2 > 1) - { - C_ld_sa_mux_lev_2_dec_out = - (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - - if (num_subarrays_per_row >= 2) - { - // wire heads for both right and left side of a mat, so half the resistance - R_wire_bit_mux_dec_out /= 2.0; - R_wire_sa_mux_dec_out /= 2.0; - } - - - row_dec = new Decoder( - num_dec_signals, - false, - subarray.C_wl, - R_wire_wl_drv_out, - false/*is_fa*/, - is_dram, - true, - camFlag? cam_cell:cell); + double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA + double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; + + if (deg_bl_muxing > 1) { + C_ld_bit_mux_dec_out = + (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) * + gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + + if (dp.Ndsam_lev_1 > 1) { + C_ld_sa_mux_lev_1_dec_out = + (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + if (dp.Ndsam_lev_2 > 1) { + C_ld_sa_mux_lev_2_dec_out = + (num_subarrays_per_mat * number_sa_subarray / + (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + + if (num_subarrays_per_row >= 2) { + // wire heads for both right and left side of a mat, so half the resistance + R_wire_bit_mux_dec_out /= 2.0; + R_wire_sa_mux_dec_out /= 2.0; + } + + + row_dec = new Decoder( + num_dec_signals, + false, + subarray.C_wl, + R_wire_wl_drv_out, + false/*is_fa*/, + is_dram, + true, + camFlag ? cam_cell : cell); // if (is_fa && (!dp.is_tag)) // { // row_dec->exist = true; // } - bit_mux_dec = new Decoder( - deg_bl_muxing,// This number is 1 for FA or CAM - false, - C_ld_bit_mux_dec_out, - R_wire_bit_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_1_dec = new Decoder( - dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM - dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal - C_ld_sa_mux_lev_1_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_2_dec = new Decoder( - dp.Ndsam_lev_2, // This number is 1 for FA or CAM - false, - C_ld_sa_mux_lev_2_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - - double C_wire_predec_blk_out; - double R_wire_predec_blk_out; - - if (!is_fa && !pure_cam) - { + bit_mux_dec = new Decoder( + deg_bl_muxing,// This number is 1 for FA or CAM + false, + C_ld_bit_mux_dec_out, + R_wire_bit_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_1_dec = new Decoder( + dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM + dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal + C_ld_sa_mux_lev_1_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_2_dec = new Decoder( + dp.Ndsam_lev_2, // This number is 1 for FA or CAM + false, + C_ld_sa_mux_lev_2_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + + double C_wire_predec_blk_out; + double R_wire_predec_blk_out; + + if (!is_fa && !pure_cam) { + + C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; + + } else { //for pre-decode block's load is same for both FA and CAM + C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; + R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; + } - C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; - R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; - } - else //for pre-decode block's load is same for both FA and CAM - { - C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; - R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; - } - - - if (is_fa||pure_cam) - num_dec_signals += _log2(num_subarrays_per_mat); - - PredecBlk * r_predec_blk1 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - true); - PredecBlk * r_predec_blk2 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - false); - PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); - PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); - dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); - dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); - - PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); - PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); - way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); - dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); - - r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); - b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); - sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); - sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); - - subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng - - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; - - if (is_fa || pure_cam) - - { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same - driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - cam_bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - if (!pure_cam) - { - //This is only used for fully asso not pure CAM - driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - } - - else - { - driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); - double w_row_decoder = area_row_decoder / subarray.area.get_h(); - - double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = - compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); - - double h_subarray_out_drv = subarray_out_wire->area.get_area() * - (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); - - - h_subarray_out_drv *= (RWP + ERP + SCHP); - - double h_comparators = 0.0; - double w_row_predecode_output_wires = 0.0; - double h_bit_mux_dec_out_wires = 0.0; - double h_senseamp_mux_dec_out_wires = 0.0; - - if ((!is_fa)&&(dp.is_tag)) - { - //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; - h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); - h_comparators *= (RWP + ERP); - } + if (is_fa || pure_cam) + num_dec_signals += _log2(num_subarrays_per_mat); + + PredecBlk * r_predec_blk1 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + true); + PredecBlk * r_predec_blk2 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + false); + PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); + PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); + dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); + dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); + + PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); + PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); + way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); + dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); + + r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); + b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); + sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); + sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); + + subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + + if (is_fa || pure_cam) + + { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same + driver_c_gate_load = (subarray.num_cols_fa_cam ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.R_per_um; + cam_bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + if (!pure_cam) { + //This is only used for fully asso not pure CAM + driver_c_gate_load = (subarray.num_cols_fa_ram ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + } + + else { + driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); + double w_row_decoder = area_row_decoder / subarray.area.get_h(); + + double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = + compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + + double h_subarray_out_drv = subarray_out_wire->area.get_area() * + (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); + + + h_subarray_out_drv *= (RWP + ERP + SCHP); + + double h_comparators = 0.0; + double w_row_predecode_output_wires = 0.0; + double h_bit_mux_dec_out_wires = 0.0; + double h_senseamp_mux_dec_out_wires = 0.0; + + if ((!is_fa) && (dp.is_tag)) { + //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; + h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); + h_comparators *= (RWP + ERP); + } int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - - - double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * - (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + - h_subarray_out_drv + h_comparators); - - double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); - - if (deg_bl_muxing > 1) - { - h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_1 > 1) - { - h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_2 > 1) - { - h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - - double h_addr_datain_wires; - if (!g_ip->ver_htree_wires_over_array) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + - (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + + double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv + h_comparators); + + double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); + + if (deg_bl_muxing > 1) { + h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_1 > 1) { + h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_2 > 1) { + h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + + double h_addr_datain_wires; + if (!g_ip->ver_htree_wires_over_array) { + h_addr_datain_wires = (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + + (dp.num_di_b_mat + dp.num_do_b_mat) / + num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + if (is_fa || pure_cam) { + h_addr_datain_wires = + (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + //TODO: revisit + (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + + (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row * + g_tp.wire_inside_mat.pitch * SCHP; + } + //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + + //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); + h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + + h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + + h_addr_datain_wires + + h_bit_mux_dec_out_wires + + h_senseamp_mux_dec_out_wires; - if (is_fa || pure_cam) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit - (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + - (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; } - //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + - //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); - h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + - h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + - h_addr_datain_wires + - h_bit_mux_dec_out_wires + - h_senseamp_mux_dec_out_wires; - - } - - // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; - double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + - b_mux_predec_blk_drv1->area.get_area() + - sa_mux_lev_1_predec_blk_drv1->area.get_area() + - sa_mux_lev_2_predec_blk_drv1->area.get_area() + - way_sel_drv1->area.get_area() + - r_predec_blk_drv2->area.get_area() + - b_mux_predec_blk_drv2->area.get_area() + - sa_mux_lev_1_predec_blk_drv2->area.get_area() + - sa_mux_lev_2_predec_blk_drv2->area.get_area() + - r_predec_blk1->area.get_area() + - b_mux_predec_blk1->area.get_area() + - sa_mux_lev_1_predec_blk1->area.get_area() + - sa_mux_lev_2_predec_blk1->area.get_area() + - r_predec_blk2->area.get_area() + - b_mux_predec_blk2->area.get_area() + - sa_mux_lev_1_predec_blk2->area.get_area() + - sa_mux_lev_2_predec_blk2->area.get_area() + - bit_mux_dec->area.get_area() + - sa_mux_lev_1_dec->area.get_area() + - sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); - - double area_efficiency_mat; + + // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; + double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + + b_mux_predec_blk_drv1->area.get_area() + + sa_mux_lev_1_predec_blk_drv1->area.get_area() + + sa_mux_lev_2_predec_blk_drv1->area.get_area() + + way_sel_drv1->area.get_area() + + r_predec_blk_drv2->area.get_area() + + b_mux_predec_blk_drv2->area.get_area() + + sa_mux_lev_1_predec_blk_drv2->area.get_area() + + sa_mux_lev_2_predec_blk_drv2->area.get_area() + + r_predec_blk1->area.get_area() + + b_mux_predec_blk1->area.get_area() + + sa_mux_lev_1_predec_blk1->area.get_area() + + sa_mux_lev_2_predec_blk1->area.get_area() + + r_predec_blk2->area.get_area() + + b_mux_predec_blk2->area.get_area() + + sa_mux_lev_1_predec_blk2->area.get_area() + + sa_mux_lev_2_predec_blk2->area.get_area() + + bit_mux_dec->area.get_area() + + sa_mux_lev_1_dec->area.get_area() + + sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + + double area_efficiency_mat; // if (!is_fa) // { - assert(num_subarrays_per_mat/num_subarrays_per_row>0); - area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; + assert(num_subarrays_per_mat / num_subarrays_per_row > 0); + area.h = (num_subarrays_per_mat / num_subarrays_per_row) * + subarray.area.h + h_non_cell_area; area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; - area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; - area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); + area.w = (area.h * area.w + area_mat_center_circuitry) / area.h; + area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * + 100.0 / area.get_area(); // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl; // cout<<"h_comparators"<<h_comparators<<endl; @@ -413,8 +412,8 @@ Mat::Mat(const DynamicParameter & dyn_p) // cout<<"w_non_cell_area"<<w_non_cell_area<<endl; // cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl; - assert(area.h>0); - assert(area.w>0); + assert(area.h > 0); + assert(area.w > 0); // } // else // { @@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p) // area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; // area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); // } - } - - - -Mat::~Mat() -{ - delete row_dec; - delete bit_mux_dec; - delete sa_mux_lev_1_dec; - delete sa_mux_lev_2_dec; - - delete r_predec->blk1; - delete r_predec->blk2; - delete b_mux_predec->blk1; - delete b_mux_predec->blk2; - delete sa_mux_lev_1_predec->blk1; - delete sa_mux_lev_1_predec->blk2; - delete sa_mux_lev_2_predec->blk1; - delete sa_mux_lev_2_predec->blk2; - delete dummy_way_sel_predec_blk1; - delete dummy_way_sel_predec_blk2; - - delete r_predec->drv1; - delete r_predec->drv2; - delete b_mux_predec->drv1; - delete b_mux_predec->drv2; - delete sa_mux_lev_1_predec->drv1; - delete sa_mux_lev_1_predec->drv2; - delete sa_mux_lev_2_predec->drv1; - delete sa_mux_lev_2_predec->drv2; - delete way_sel_drv1; - delete dummy_way_sel_predec_blk_drv2; - - delete r_predec; - delete b_mux_predec; - delete sa_mux_lev_1_predec; - delete sa_mux_lev_2_predec; - - delete subarray_out_wire; - if (!pure_cam) - delete bl_precharge_eq_drv; - - if (is_fa || pure_cam) - { - delete sl_precharge_eq_drv ; - delete sl_data_drv ; - delete cam_bl_precharge_eq_drv; - delete ml_precharge_drv; - delete ml_to_ram_wl_drv; - } } -double Mat::compute_delays(double inrisetime) -{ - int k; - double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; - double outrisetime_search, outrisetime, row_dec_outrisetime; - // delay calculation for tags of fully associative cache - if (is_fa || pure_cam) - { - //Compute search access time - outrisetime_search = compute_cam_delay(inrisetime); - if (is_fa) - { - bl_precharge_eq_drv->compute_delay(0); - k = ml_to_ram_wl_drv->number_gates - 1; - rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); - C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; - tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - - R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in - R_bl = subarray.num_rows * r_b_metal; - C_bl = subarray.C_bl; - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - - - outrisetime_search = compute_bitline_delay(outrisetime_search); - outrisetime_search = compute_sa_delay(outrisetime_search); - } - outrisetime_search = compute_subarray_out_drv(outrisetime_search); - subarray_out_wire->set_in_rise_time(outrisetime_search); - outrisetime_search = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - - - //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - if (pure_cam) - { - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - } - return outrisetime_search; +Mat::~Mat() { + delete row_dec; + delete bit_mux_dec; + delete sa_mux_lev_1_dec; + delete sa_mux_lev_2_dec; + + delete r_predec->blk1; + delete r_predec->blk2; + delete b_mux_predec->blk1; + delete b_mux_predec->blk2; + delete sa_mux_lev_1_predec->blk1; + delete sa_mux_lev_1_predec->blk2; + delete sa_mux_lev_2_predec->blk1; + delete sa_mux_lev_2_predec->blk2; + delete dummy_way_sel_predec_blk1; + delete dummy_way_sel_predec_blk2; + + delete r_predec->drv1; + delete r_predec->drv2; + delete b_mux_predec->drv1; + delete b_mux_predec->drv2; + delete sa_mux_lev_1_predec->drv1; + delete sa_mux_lev_1_predec->drv2; + delete sa_mux_lev_2_predec->drv1; + delete sa_mux_lev_2_predec->drv2; + delete way_sel_drv1; + delete dummy_way_sel_predec_blk_drv2; + + delete r_predec; + delete b_mux_predec; + delete sa_mux_lev_1_predec; + delete sa_mux_lev_2_predec; + + delete subarray_out_wire; + if (!pure_cam) + delete bl_precharge_eq_drv; + + if (is_fa || pure_cam) { + delete sl_precharge_eq_drv ; + delete sl_data_drv ; + delete cam_bl_precharge_eq_drv; + delete ml_precharge_drv; + delete ml_to_ram_wl_drv; + } +} + + + +double Mat::compute_delays(double inrisetime) { + int k; + double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl; + double outrisetime_search, outrisetime, row_dec_outrisetime; + // delay calculation for tags of fully associative cache + if (is_fa || pure_cam) { + //Compute search access time + outrisetime_search = compute_cam_delay(inrisetime); + if (is_fa) { + bl_precharge_eq_drv->compute_delay(0); + k = ml_to_ram_wl_drv->number_gates - 1; + rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h, + is_dram, false, true); + C_ld = ml_to_ram_wl_drv->c_gate_load + + ml_to_ram_wl_drv->c_wire_load; + tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + + R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in + R_bl = subarray.num_rows * r_b_metal; + C_bl = subarray.C_bl; + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + + outrisetime_search = compute_bitline_delay(outrisetime_search); + outrisetime_search = compute_sa_delay(outrisetime_search); + } + outrisetime_search = compute_subarray_out_drv(outrisetime_search); + subarray_out_wire->set_in_rise_time(outrisetime_search); + outrisetime_search = subarray_out_wire->signal_rise_time(); + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + + //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + if (pure_cam) { + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + } + return outrisetime_search; + } else { + bl_precharge_eq_drv->compute_delay(0); + if (row_dec->exist == true) { + int k = row_dec->num_gates - 1; + double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); + // TODO: this 4*cell.h number must be revisited + double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram, + false, true); + double C_ld = row_dec->C_ld_dec_out; + double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + } + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + double r_b_metal = cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * r_b_metal; + double C_bl = subarray.C_bl; + + if (is_dram) { + delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } else { + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } + } + + + + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + outrisetime = compute_subarray_out_drv(outrisetime); + subarray_out_wire->set_in_rise_time(outrisetime); + outrisetime = subarray_out_wire->signal_rise_time(); + + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + if (dp.is_tag == true && dp.fully_assoc == false) { + compute_comparator_delay(0); } - else - { - bl_precharge_eq_drv->compute_delay(0); - if (row_dec->exist == true) - { - int k = row_dec->num_gates - 1; - double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); - // TODO: this 4*cell.h number must be revisited - double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - double C_ld = row_dec->C_ld_dec_out; - double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - } - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - double r_b_metal = cell.h * g_tp.wire_local.R_per_um; - double R_bl = subarray.num_rows * r_b_metal; - double C_bl = subarray.C_bl; - - if (is_dram) - { - delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - else - { - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - } - - - - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - outrisetime = compute_subarray_out_drv(outrisetime); - subarray_out_wire->set_in_rise_time(outrisetime); - outrisetime = subarray_out_wire->signal_rise_time(); - - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - - if (dp.is_tag == true && dp.fully_assoc == false) - { - compute_comparator_delay(0); - } - - if (row_dec->exist == false) - { - delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + + if (row_dec->exist == false) { + delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); } - return outrisetime; + return outrisetime; } -double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() -{ - - double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) + - compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry - - if (deg_bl_muxing > 1) - { - height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height - // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height - } - - height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height - - if (dp.Ndsam_lev_1 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - } - - if (dp.Ndsam_lev_2 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - - // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux - height += 2 * compute_tr_width_after_folding( - pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - } - - // TODO: this should be uncommented... - /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) - { - //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); - double width_write_driver_write_mux = width_write_driver_or_write_mux(); - double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, - cell.w * - // deg_bl_muxing * - dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); - height += height_write_driver_write_mux; - }*/ - - return height; +double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() { + + double height = + compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, + camFlag ? cam_cell.w : + cell.w / (2 * (RWP + ERP + SCHP))) + + // precharge circuitry + compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, + camFlag ? cam_cell.w : + cell.w / (RWP + ERP + SCHP)); + + if (deg_bl_muxing > 1) { + // col mux tr height + height += + compute_tr_width_after_folding(g_tp.w_nmos_b_mux, + cell.w / (2 * (RWP + ERP))); + // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height + } + + height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height + + if (dp.Ndsam_lev_1 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + } + + if (dp.Ndsam_lev_2 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + + // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux + height += 2 * compute_tr_width_after_folding( + pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + } + + // TODO: this should be uncommented... + /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) + { + //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); + double width_write_driver_write_mux = width_write_driver_or_write_mux(); + double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, + cell.w * + // deg_bl_muxing * + dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); + height += height_write_driver_write_mux; + }*/ + + return height; } -double Mat::compute_cam_delay(double inrisetime) -{ +double Mat::compute_cam_delay(double inrisetime) { - double out_time_ramp, this_delay; - double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; + double out_time_ramp, this_delay; + double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; - double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, + double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; - double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; - int Htagbits; - - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; - //double searchline_precharge_time; - - double leak_power_cc_inverters_sram_cell = 0; - double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; - double leak_power_RD_port_sram_cell = 0; - double leak_power_SCHP_port_sram_cell = 0; - double leak_comparator_cam_cell =0; - - double gate_leak_comparator_cam_cell = 0; - double gate_leak_power_cc_inverters_sram_cell = 0; - double gate_leak_power_RD_port_sram_cell = 0; - double gate_leak_power_SCHP_port_sram_cell = 0; - - c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; - c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; - r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; - r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; - - dynSearchEng = 0.0; - delay_matchchline = 0.0; - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); - bool linear_scaling = false; - - if (linear_scaling) - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort - } - else - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process - Wdummyn = g_tp.cam.cell_nmos_w; - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - } - - Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); - - /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. - search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. - From the driver(am and an) to the comparators in all the rows including the dummy row, - Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ - - //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports - //Searchline precharge routes horizontally - driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - - sl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - //searchline data driver ; subarray.num_rows + 1 is because of the dummy row - //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - sl_data_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - sl_precharge_eq_drv->compute_delay(0); - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr - double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; - double R_bl = (subarray.num_rows + 1) * r_b_metal; - double C_bl = subarray.C_bl_cam; - delay_cam_sl_restore = sl_precharge_eq_drv->delay - + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2); - - out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) - - //matchline ops delay - delay_matchchline += sl_data_drv->delay; - - /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ - //matchline delay, matchline power, matchline_reset for cycle time computation, - - ////matchline precharge circuitry routes vertically - //There are two matchline precharge driver chains per subarray. - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - - ml_precharge_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - ml_precharge_drv->compute_delay(0); - - - rd = tr_R_on(Wdummyn, NCH, 2, is_dram); - c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit - + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline - - Cwire = c_matchline_metal * Htagbits; - Rwire = r_matchline_metal * Htagbits; - c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); - - double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_ml = Rwire; - double C_ml = Cwire + c_intrinsic; - delay_cam_ml_reset = ml_precharge_drv->delay - + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too - - //matchline ops delay - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); - delay_matchchline += this_delay; - out_time_ramp = this_delay / VTHFA3; - - dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise - * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves - - /* third stage, from the NAND2 gates to the drivers in the dummy row */ - rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); - c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + - drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2; - c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); - out_time_ramp = this_delay / (1 - VTHFA4); - delay_matchchline += this_delay; - - //only the dummy row has the extra inverter between NAND and NOR gates - dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; - - /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ - rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); - c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2; - Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2; - c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); - tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); - out_time_ramp = this_delay / VTHFA5; - delay_matchchline += this_delay; - - dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - - /*final statge from the NOR gate to drive the wordline of the data portion */ - - //searchline data driver There are two matchline precharge driver chains per subarray. - driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic - driver_c_wire_load = subarray.C_wl_ram; - driver_r_wire_load = subarray.R_wl_ram; - - ml_to_ram_wl_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - - - rd = tr_R_on(Wfanorn, NCH, 1, is_dram); - c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); - out_time_ramp = this_delay / (1-0.5); - delay_matchchline += this_delay; - - out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); - - //c_gate_load energy is computed in ml_to_ram_wl_drv - dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - - - /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ - /*Precharge the hitting logic */ - c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; - - rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_hit_miss = Rwire; - double C_hit_miss = Cwire + c_intrinsic; - delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - - /*hitting logic evaluation */ - c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; - - rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - - delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); - - if (is_fa) - delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); - - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - - /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ - - power_matchline.searchOp.dynamic = dynSearchEng; - - //leakage in one subarray - double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? - double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); - double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv - - leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; - leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; - leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; - leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; - leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports - - power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + - leak_comparator_cam_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP + - leak_power_SCHP_port_sram_cell*SCHP; + double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; + int Htagbits; + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + //double searchline_precharge_time; + + double leak_power_cc_inverters_sram_cell = 0; + double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; + double leak_power_RD_port_sram_cell = 0; + double leak_power_SCHP_port_sram_cell = 0; + double leak_comparator_cam_cell =0; + + double gate_leak_comparator_cam_cell = 0; + double gate_leak_power_cc_inverters_sram_cell = 0; + double gate_leak_power_RD_port_sram_cell = 0; + double gate_leak_power_SCHP_port_sram_cell = 0; + + c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; + c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; + r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; + r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; + + dynSearchEng = 0.0; + delay_matchchline = 0.0; + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); + bool linear_scaling = false; + + if (linear_scaling) { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort + } else { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process + Wdummyn = g_tp.cam.cell_nmos_w; + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + } + + Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); + + /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. + search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. + From the driver(am and an) to the comparators in all the rows including the dummy row, + Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ + + //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports + //Searchline precharge routes horizontally + driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + + sl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + //searchline data driver ; subarray.num_rows + 1 is because of the dummy row + //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + sl_data_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + sl_precharge_eq_drv->compute_delay(0); + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr + double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; + double R_bl = (subarray.num_rows + 1) * r_b_metal; + double C_bl = subarray.C_bl_cam; + delay_cam_sl_restore = sl_precharge_eq_drv->delay + + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) + + //matchline ops delay + delay_matchchline += sl_data_drv->delay; + + /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ + //matchline delay, matchline power, matchline_reset for cycle time computation, + + ////matchline precharge circuitry routes vertically + //There are two matchline precharge driver chains per subarray. + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + + ml_precharge_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + ml_precharge_drv->compute_delay(0); + + + rd = tr_R_on(Wdummyn, NCH, 2, is_dram); + c_intrinsic = Htagbits * + (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, + is_dram)//TODO: the cell_h_def should be revisit + + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) / + Htagbits);//since each halve only has one precharge tx per matchline + + Cwire = c_matchline_metal * Htagbits; + Rwire = r_matchline_metal * Htagbits; + c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); + + double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_ml = Rwire; + double C_ml = Cwire + c_intrinsic; + //TODO: latest CAM has sense amps on matchlines too + delay_cam_ml_reset = ml_precharge_drv->delay + + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2); + + //matchline ops delay + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); + delay_matchchline += this_delay; + out_time_ramp = this_delay / VTHFA3; + + dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) * + (subarray.num_rows + 1)) //TODO: need to be precise + * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * + 2;//each subarry has two halves + + /* third stage, from the NAND2 gates to the drivers in the dummy row */ + rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); + c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2; + c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); + out_time_ramp = this_delay / (1 - VTHFA4); + delay_matchchline += this_delay; + + //only the dummy row has the extra inverter between NAND and NOR gates + dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; + + /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ + rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); + c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Cwire = c_matchline_metal * Htagbits + c_searchline_metal * + (subarray.num_rows + 1) / 2; + Rwire = r_matchline_metal * Htagbits + r_searchline_metal * + (subarray.num_rows + 1) / 2; + c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); + tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); + out_time_ramp = this_delay / VTHFA5; + delay_matchchline += this_delay; + + dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + /*final statge from the NOR gate to drive the wordline of the data portion */ + + //searchline data driver There are two matchline precharge driver chains per subarray. + driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic + driver_c_wire_load = subarray.C_wl_ram; + driver_r_wire_load = subarray.R_wl_ram; + + ml_to_ram_wl_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + + + rd = tr_R_on(Wfanorn, NCH, 1, is_dram); + c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); + out_time_ramp = this_delay / (1 - 0.5); + delay_matchchline += this_delay; + + out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); + + //c_gate_load energy is computed in ml_to_ram_wl_drv + dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + + /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ + /*Precharge the hitting logic */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; + + rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_hit_miss = Rwire; + double C_hit_miss = Cwire + c_intrinsic; + delay_hit_miss_reset = log(g_tp.cam.Vbitpre) * + (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /*hitting logic evaluation */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; + + rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + + delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); + + if (is_fa) + delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); + + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ + + power_matchline.searchOp.dynamic = dynSearchEng; + + //leakage in one subarray + double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? + double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); + double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * 2; + //approx XOR with Inv + double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, + false, true) * 2; + + leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; + leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; + leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports + + power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + + leak_comparator_cam_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP + + leak_power_SCHP_port_sram_cell * SCHP; // power_matchline.searchOp.leakage += leak_comparator_cam_cell; - power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; - //In idle states, the hit/miss txs are closed (on) therefore no Isub - power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ + power_matchline.searchOp.leakage *= (subarray.num_rows + 1) * + subarray.num_cols_fa_cam;//TODO:dumy line precise + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; + //In idle states, the hit/miss txs are closed (on) therefore no Isub + power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; - - gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; - gate_leak_power_SCHP_port_sram_cell = 0; - - //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl; - - power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell; - power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell; - power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP; - power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise - power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ - + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; - - - return out_time_ramp; + //in idle state, Ig_on only possibly exist in access transistors of read only ports + double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); + double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * 2; + double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, + false, true) * 2; + + gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd; + gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; + gate_leak_power_SCHP_port_sram_cell = 0; + + //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl; + + power_matchline.searchOp.gate_leakage += + gate_leak_power_cc_inverters_sram_cell; + power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell; + power_matchline.searchOp.gate_leakage += + gate_leak_power_SCHP_port_sram_cell * SCHP + + gate_leak_power_RD_port_sram_cell * ERP; + power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) * + subarray.num_cols_fa_cam;//TODO:dumy line precise + power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * + cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * + cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) * + cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.gate_leakage += subarray.num_rows * + cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd + + + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd; + + + return out_time_ramp; } -double Mat::width_write_driver_or_write_mux() -{ - // calculate resistance of SRAM cell pull-up PMOS transistor - // cam and sram have same cell trasistor properties - double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true); - double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true); - double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2; - double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram); +double Mat::width_write_driver_or_write_mux() { + // calculate resistance of SRAM cell pull-up PMOS transistor + // cam and sram have same cell trasistor properties + double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true); + double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true); + double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2; + double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram); - return width_write_driver_nmos; + return width_write_driver_nmos; } @@ -1007,134 +1032,164 @@ double Mat::width_write_driver_or_write_mux() double Mat::compute_comparators_height( int tagbits, int number_ways_in_mat, - double subarray_mem_cell_area_width) -{ - double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def); - double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4; - return cumulative_area / subarray_mem_cell_area_width; + double subarray_mem_cell_area_width) { + double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def); + double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4; + return cumulative_area / subarray_mem_cell_area_width; } -double Mat::compute_bitline_delay(double inrisetime) -{ - double V_b_pre, v_th_mem_cell, V_wl; - double tstep; - double dynRdEnergy = 0.0, dynWriteEnergy = 0.0; - double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0; - int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2; - - double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um; - double R_bl = subarray.num_rows * R_b_metal; - double C_bl = subarray.C_bl; - - // TODO: no leakage for DRAMs? - double leak_power_cc_inverters_sram_cell = 0; - double gate_leak_power_cc_inverters_sram_cell = 0; - double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; - double leak_power_RD_port_sram_cell = 0; - double gate_leak_power_RD_port_sram_cell = 0; - - if (is_dram == true) - { - V_b_pre = g_tp.dram.Vbitpre; - v_th_mem_cell = g_tp.dram_acc.Vth; - V_wl = g_tp.vpp; - //The access transistor is not folded. So we just need to specify a threshold value for the - //folding width that is equal to or greater than Wmemcella. - R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true); - r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2; - } - else - { //SRAM - V_b_pre = g_tp.sram.Vbitpre; - v_th_mem_cell = g_tp.sram_cell.Vth; - V_wl = g_tp.sram_cell.Vdd; - R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true); - R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true); - - //Leakage current of an SRAM cell - double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2? - double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true); - double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell - - leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; - leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd; - leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd; +double Mat::compute_bitline_delay(double inrisetime) { + double V_b_pre, v_th_mem_cell, V_wl; + double tstep; + double dynRdEnergy = 0.0, dynWriteEnergy = 0.0; + double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0; + int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2; + + double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * R_b_metal; + double C_bl = subarray.C_bl; + + // TODO: no leakage for DRAMs? + double leak_power_cc_inverters_sram_cell = 0; + double gate_leak_power_cc_inverters_sram_cell = 0; + double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; + double leak_power_RD_port_sram_cell = 0; + double gate_leak_power_RD_port_sram_cell = 0; + + if (is_dram == true) { + V_b_pre = g_tp.dram.Vbitpre; + v_th_mem_cell = g_tp.dram_acc.Vth; + V_wl = g_tp.vpp; + //The access transistor is not folded. So we just need to specify a + // threshold value for the folding width that is equal to or greater + // than Wmemcella. + R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true); + r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2; + } else { //SRAM + V_b_pre = g_tp.sram.Vbitpre; + v_th_mem_cell = g_tp.sram_cell.Vth; + V_wl = g_tp.sram_cell.Vdd; + R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true); + R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true); + + //Leakage current of an SRAM cell + //TODO: how much is the idle time? just by *2? + double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, + false, true); + double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos, + false, true); + double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, + g_tp.sram.cell_pmos_w, 1, inv, false, + true) * 2;//two invs per cell + + leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd; + + + //in idle state, Ig_on only possibly exist in access transistors of read only ports + double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, + false, true); + double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, + g_tp.sram.cell_pmos_w, 1, inv, false, + true); + + gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; + } - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true); - - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; - } - - - double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram); - double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); - double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); - double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); - double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + - drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); - double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); - - if (is_dram) - { - double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl)); - tstep = 2.3 * fraction * r_dev * - (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) / - (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)); - delay_writeback = tstep; - dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100; - per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; - } - else - { - double tau; - - if (deg_bl_muxing > 1) - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* - subarray.num_cols * num_subarrays_per_mat*/; - dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); - dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; - //Write Ops are differential for SRAM - } - else - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; + double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w / + (2 * (RWP + ERP + SCHP)), is_dram); + double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); + double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); + double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, + is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + + if (is_dram) { + double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl)); + tstep = 2.3 * fraction * r_dev * + (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)) / + (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)); + delay_writeback = tstep; + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * + num_act_mats_hor_dir * 100; + per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; + } else { + double tau; + + if (deg_bl_muxing > 1) { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 * + C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) + + R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * + g_tp.sram_cell.Vdd; + dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * + (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing); + dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + //Write Ops are differential for SRAM + } else { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * C_bl) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + + } + tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); + power_bitline.readOp.leakage = + leak_power_cc_inverters_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP; + power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + + gate_leak_power_RD_port_sram_cell * ERP; } - tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); - power_bitline.readOp.leakage = - leak_power_cc_inverters_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP; - power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + - gate_leak_power_RD_port_sram_cell * ERP; - - } // cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl; // cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl; @@ -1142,607 +1197,684 @@ double Mat::compute_bitline_delay(double inrisetime) // cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl; - /* take input rise time into account */ - double m = V_wl / inrisetime; - if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) - { - delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m); - } - else - { - delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m); - } + /* take input rise time into account */ + double m = V_wl / inrisetime; + if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) { + delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m); + } else { + delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m); + } - bool is_fa = (dp.fully_assoc) ? true : false; + bool is_fa = (dp.fully_assoc) ? true : false; - if (dp.is_tag == false || is_fa == false) - { - power_bitline.readOp.dynamic = dynRdEnergy; - power_bitline.writeOp.dynamic = dynWriteEnergy; - } + if (dp.is_tag == false || is_fa == false) { + power_bitline.readOp.dynamic = dynRdEnergy; + power_bitline.writeOp.dynamic = dynWriteEnergy; + } - double outrisetime = 0; - return outrisetime; + double outrisetime = 0; + return outrisetime; } -double Mat::compute_sa_delay(double inrisetime) -{ - //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray - - //Bitline circuitry leakage. - double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram); - double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram); - double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram); - double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram); - - double lkgIdlePh = IsenseEn;//+ 2*IoBufP; - //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch; - double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ; - //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir + - // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir); - double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/; - leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; - leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; - - // sense amplifier has to drive logic in "data out driver" and sense precharge load. - // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time - //constant as well as the magnitude of input differential voltage. - double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + - drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); - double tau = C_ld / g_tp.gm_sense_amp_latch; - delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense); - power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray +double Mat::compute_sa_delay(double inrisetime) { + //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray + + //Bitline circuitry leakage. + double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram); + double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram); + double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram); + double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram); + + double lkgIdlePh = IsenseEn;//+ 2*IoBufP; + //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch; + double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ; + //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir + + // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir); + double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/; + leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; + leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/; + + // sense amplifier has to drive logic in "data out driver" and sense precharge load. + // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time + //constant as well as the magnitude of input differential voltage. + double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? + cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram) + + drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ? + cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram) + + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? + cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram); + double tau = C_ld / g_tp.gm_sense_amp_latch; + delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense); + power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray num_subarrays_per_mat * num_act_mats_hor_dir*/; - power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd; + power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd; - double outrisetime = 0; - return outrisetime; + double outrisetime = 0; + return outrisetime; } -double Mat::compute_subarray_out_drv(double inrisetime) -{ - double C_ld, rd, tf, this_delay; - double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram); - - // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer. - rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); - C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; - // delay of signal through inverter-buffer to second level of sense-amp mux. - // internal delay of buffer - rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); - C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; - - // inverter driving drain of pass transistor of second level of sense-amp mux. - rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); - C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd; - - - // delay of signal through pass-transistor to input of subarray output driver. - rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); - C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) + - //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; - - - return inrisetime; +double Mat::compute_subarray_out_drv(double inrisetime) { + double C_ld, rd, tf, this_delay; + double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram); + + // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer. + rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w * + deg_bl_muxing / (RWP + ERP + SCHP), + is_dram) + + gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; + // delay of signal through inverter-buffer to second level of sense-amp mux. + // internal delay of buffer + rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); + C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += + cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv, is_dram) * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + + // inverter driving drain of pass transistor of second level of sense-amp mux. + rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); + C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, + is_dram) + + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? + cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / + (RWP + ERP + SCHP), is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += + cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + + + // delay of signal through pass-transistor to input of subarray output driver. + rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + C_ld = dp.Ndsam_lev_2 * + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), + is_dram) + + //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + gate_C(subarray_out_wire->repeater_size * + (subarray_out_wire->wire_length / + subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * + (1 + p_to_n_sz_r), 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; + + + return inrisetime; } -double Mat::compute_comparator_delay(double inrisetime) -{ - int A = g_ip->tag_assoc; - - int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already - // a multiple of 4. - - /* First Inverter */ - double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); - double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); - double tf = Req*Ceq; - double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); - double nextinputtime = st1del/VTHCOMPINV; - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - - //For each degree of associativity - //there are 4 such quarter comparators - double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - /* Second Inverter */ - Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); - tf = Req*Ceq; - double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); - nextinputtime = st2del/(1.0-VTHCOMPINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - - /* Third Inverter */ - Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); - tf = Req*Ceq; - double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); - nextinputtime = st3del/(VTHEVALINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - - /* Final Inverter (virtual ground driver) discharging compare part */ - double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); - double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ - double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); - double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); - power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); - lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 - - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter - - /* time to go to threshold of mux driver */ - double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); - /* take into account non-zero input rise time */ - double m = g_tp.peri_global.Vdd/nextinputtime; - double Tcomparatorni; - - if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) - { - double a = m; - double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); - } - else - { - Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; - } - delay_comparator = Tcomparatorni+st1del+st2del+st3del; - power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; - power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; - - return Tcomparatorni / (1.0 - VTHMUXNAND);; +double Mat::compute_comparator_delay(double inrisetime) { + int A = g_ip->tag_assoc; + + int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already + // a multiple of 4. + + /* First Inverter */ + double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); + double tf = Req * Ceq; + double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL); + double nextinputtime = st1del / VTHCOMPINV; + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + + //For each degree of associativity + //there are 4 such quarter comparators + double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + /* Second Inverter */ + Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); + tf = Req * Ceq; + double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE); + nextinputtime = st2del / (1.0 - VTHCOMPINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; + + /* Third Inverter */ + Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); + tf = Req * Ceq; + double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL); + nextinputtime = st3del / (VTHEVALINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, + 1, inv, is_dram) * 4 * A; + + /* Final Inverter (virtual ground driver) discharging compare part */ + double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram); + double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */ + double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram); + power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); + lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; // stack factor of 0.2 + + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + //for gate leakage this equals to a inverter + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; + + /* time to go to threshold of mux driver */ + double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND); + /* take into account non-zero input rise time */ + double m = g_tp.peri_global.Vdd / nextinputtime; + double Tcomparatorni; + + if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) { + double a = m; + double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - + g_tp.peri_global.Vth); + double c = -2 * (tstep) * (g_tp.peri_global.Vdd - + g_tp.peri_global.Vth) + 1 / m * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); + Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a); + } else { + Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd + + g_tp.peri_global.Vth) / (2 * m) - + (g_tp.peri_global.Vdd * VTHEVALINV) / m; + } + delay_comparator = Tcomparatorni + st1del + st2del + st3del; + power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; + power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; + + return Tcomparatorni / (1.0 - VTHMUXNAND);; } -void Mat::compute_power_energy() -{ - //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power +void Mat::compute_power_energy() { + //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power //when search all subarrays and all mats are fully active - //when plain read/write only one subarray in a single mat is active. + //when plain read/write only one subarray in a single mat is active. // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. - power.readOp.dynamic += r_predec->power.readOp.dynamic + - b_mux_predec->power.readOp.dynamic + - sa_mux_lev_1_predec->power.readOp.dynamic + - sa_mux_lev_2_predec->power.readOp.dynamic; - - // add energy consumed in decoders - power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; - if (!(is_fa||pure_cam)) - power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; - - // add energy consumed in bitline prechagers, SAs, and bitlines - if (!(is_fa||pure_cam)) - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; - - // add energy consumed in bitlines - //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl; - power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols; - power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols; - //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl; - //Add subarray output energy - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - } - - else if (is_fa) - { - //for plain read/write only one subarray in a mat is active - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic - + cam_bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - - //Add sense amps energy - num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; - num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; - power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; - power_sa.readOp.dynamic *= num_sa_subarray; - - - // add energy consumed in bitlines - power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; - power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; - - //Add subarray output energy - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - //add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - - } - else - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; - power_sa.searchOp.dynamic = 0; - - power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; - power_bitline.searchOp.dynamic = 0; - power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; - - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - - ////add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - - } - - - - // calculate leakage power - if (!(is_fa || pure_cam)) - { - int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - //cout<<"leakage"<<power.readOp.leakage<<endl; - - power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); - power.readOp.leakage += power_comparator.readOp.leakage; - - //cout<<"leakage1"<<power.readOp.leakage<<endl; - - // leakage power - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; - - power.readOp.leakage += r_predec->power.readOp.leakage + - b_mux_predec->power.readOp.leakage + - sa_mux_lev_1_predec->power.readOp.leakage + - sa_mux_lev_2_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage + - power_bit_mux_decoders.readOp.leakage + - power_sa_mux_lev_1_decoders.readOp.leakage + - power_sa_mux_lev_2_decoders.readOp.leakage; - //cout<<"leakage2"<<power.readOp.leakage<<endl; - - //++++Below is gate leakage - power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - //cout<<"leakage"<<power.readOp.leakage<<endl; - - power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP); - power.readOp.gate_leakage += power_comparator.readOp.gate_leakage; - - //cout<<"leakage1"<<power.readOp.gate_leakage<<endl; - - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; - - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - b_mux_predec->power.readOp.gate_leakage + - sa_mux_lev_1_predec->power.readOp.gate_leakage + - sa_mux_lev_2_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage + - power_bit_mux_decoders.readOp.gate_leakage + - power_sa_mux_lev_1_decoders.readOp.gate_leakage + - power_sa_mux_lev_2_decoders.readOp.gate_leakage; - } - else if (is_fa) - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - //cout<<"leakage3"<<power.readOp.leakage<<endl; - - - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - - //cout<<"leakage4"<<power.readOp.leakage<<endl; - - // leakage power - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; - - //cout<<"leakage5"<<power.readOp.leakage<<endl; - - //inside cam - power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; - - power.readOp.leakage += power_cam_all_active.searchOp.leakage; - -// cout<<"leakage6"<<power.readOp.leakage<<endl; - - //+++Below is gate leakage - power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - //cout<<"leakage3"<<power.readOp.gate_leakage<<endl; - - - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - - //cout<<"leakage4"<<power.readOp.gate_leakage<<endl; - - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + power.readOp.dynamic += r_predec->power.readOp.dynamic + + b_mux_predec->power.readOp.dynamic + + sa_mux_lev_1_predec->power.readOp.dynamic + + sa_mux_lev_2_predec->power.readOp.dynamic; + + // add energy consumed in decoders + power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; + if (!(is_fa || pure_cam)) + power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; + + // add energy consumed in bitline prechagers, SAs, and bitlines + if (!(is_fa || pure_cam)) { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ; + + // add energy consumed in bitlines + //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl; + power_bitline.readOp.dynamic *= num_subarrays_per_mat * + subarray.num_cols; + power_bitline.writeOp.dynamic *= num_subarrays_per_mat * + subarray.num_cols; + //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl; + //Add subarray output energy + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + } - //cout<<"leakage5"<<power.readOp.gate_leakage<<endl; + else if (is_fa) { + //for plain read/write only one subarray in a mat is active + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic + + cam_bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + + //Add sense amps energy + num_sa_subarray = (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram) / deg_bl_muxing; + num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing; + power_sa.searchOp.dynamic = power_sa.readOp.dynamic * + num_sa_subarray_search; + power_sa.readOp.dynamic *= num_sa_subarray; + + + // add energy consumed in bitlines + power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; + power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; + + //Add subarray output energy + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + //add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + + } else { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; + power_sa.searchOp.dynamic = 0; + + power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.searchOp.dynamic = 0; + power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; + + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + + ////add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = + power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + } - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - } - else - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + // calculate leakage power + if (!(is_fa || pure_cam)) { + int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + //cout<<"leakage"<<power.readOp.leakage<<endl; + + power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); + power.readOp.leakage += power_comparator.readOp.leakage; + + //cout<<"leakage1"<<power.readOp.leakage<<endl; + + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; + + power.readOp.leakage += r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + //cout<<"leakage2"<<power.readOp.leakage<<endl; + + //++++Below is gate leakage + power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + //cout<<"leakage"<<power.readOp.leakage<<endl; + + power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP); + power.readOp.gate_leakage += power_comparator.readOp.gate_leakage; + + //cout<<"leakage1"<<power.readOp.gate_leakage<<endl; + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; + + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + b_mux_predec->power.readOp.gate_leakage + + sa_mux_lev_1_predec->power.readOp.gate_leakage + + sa_mux_lev_2_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage + + power_bit_mux_decoders.readOp.gate_leakage + + power_sa_mux_lev_1_decoders.readOp.gate_leakage + + power_sa_mux_lev_2_decoders.readOp.gate_leakage; + } else if (is_fa) { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); - power.readOp.leakage += //power_bitline.readOp.leakage + - //power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; + //cout<<"leakage3"<<power.readOp.leakage<<endl; - // leakage power - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; - //inside cam - power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.leakage += power_cam_all_active.searchOp.leakage; + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; - //+++Below is gate leakage - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + //cout<<"leakage4"<<power.readOp.leakage<<endl; + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + //cout<<"leakage5"<<power.readOp.leakage<<endl; - power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + - //power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= + num_subarrays_per_mat; - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + power.readOp.leakage += power_cam_all_active.searchOp.leakage; - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; +// cout<<"leakage6"<<power.readOp.leakage<<endl; - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - } + //+++Below is gate leakage + power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); + + //cout<<"leakage3"<<power.readOp.gate_leakage<<endl; + + + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + //cout<<"leakage4"<<power.readOp.gate_leakage<<endl; + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //cout<<"leakage5"<<power.readOp.gate_leakage<<endl; + + //inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + + } else { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.leakage += //power_bitline.readOp.leakage + + //power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * + subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; + + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + + power.readOp.leakage += power_cam_all_active.searchOp.leakage; + + //+++Below is gate leakage + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + + //power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= + num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + } } |