summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti/mat.cc
diff options
context:
space:
mode:
authorYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
committerYasuko Eckert <yasuko.eckert@amd.com>2014-06-03 13:32:59 -0700
commit0deef376d96bfe0a3a2496714ac22471d9ee818a (patch)
tree43d383a5bc4315863240dd61f7a4077ce2ac86e7 /ext/mcpat/cacti/mat.cc
parent1104199115a6ff5ed04f92ba6391f18728765014 (diff)
downloadgem5-0deef376d96bfe0a3a2496714ac22471d9ee818a.tar.xz
ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
Diffstat (limited to 'ext/mcpat/cacti/mat.cc')
-rw-r--r--[-rwxr-xr-x]ext/mcpat/cacti/mat.cc3282
1 files changed, 1707 insertions, 1575 deletions
diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc
index ef98107c7..447996053 100755..100644
--- a/ext/mcpat/cacti/mat.cc
+++ b/ext/mcpat/cacti/mat.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -36,371 +37,369 @@
#include "mat.h"
Mat::Mat(const DynamicParameter & dyn_p)
- :dp(dyn_p),
- power_subarray_out_drv(),
- delay_fa_tag(0), delay_cam(0),
- delay_before_decoder(0), delay_bitline(0),
- delay_wl_reset(0), delay_bl_restore(0),
- delay_searchline(0), delay_matchchline(0),
- delay_cam_sl_restore(0), delay_cam_ml_reset(0),
- delay_fa_ram_wl(0),delay_hit_miss_reset(0),
- delay_hit_miss(0),
- subarray(dp, dp.fully_assoc),
- power_bitline(), per_bitline_read_energy(0),
- deg_bl_muxing(dp.deg_bl_muxing),
- num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
- delay_writeback(0),
- cell(subarray.cell), cam_cell(subarray.cam_cell),
- is_dram(dyn_p.is_dram),
- pure_cam(dyn_p.pure_cam),
- num_mats(dp.num_mats),
- power_sa(), delay_sa(0),
- leak_power_sense_amps_closed_page_state(0),
- leak_power_sense_amps_open_page_state(0),
- delay_subarray_out_drv(0),
- delay_comparator(0), power_comparator(),
- num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
- num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
- num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
-{
- assert(num_subarrays_per_mat <= 4);
- assert(num_subarrays_per_row <= 2);
- is_fa = (dp.fully_assoc) ? true : false;
- camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
-
- if (is_fa || pure_cam)
- num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
-
- if (dp.use_inp_params == 1) {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
-
- }
-
- double number_sa_subarray;
-
- if (!is_fa && !pure_cam)
- {
- number_sa_subarray = subarray.num_cols / deg_bl_muxing;
- }
- else if (is_fa && !pure_cam)
- {
- number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
- }
-
- else
- {
- number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
- }
-
- int num_dec_signals = subarray.num_rows;
- double C_ld_bit_mux_dec_out = 0;
- double C_ld_sa_mux_lev_1_dec_out = 0;
- double C_ld_sa_mux_lev_2_dec_out = 0;
- double R_wire_wl_drv_out;
-
- if (!is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ : dp(dyn_p),
+ power_subarray_out_drv(),
+ delay_fa_tag(0), delay_cam(0),
+ delay_before_decoder(0), delay_bitline(0),
+ delay_wl_reset(0), delay_bl_restore(0),
+ delay_searchline(0), delay_matchchline(0),
+ delay_cam_sl_restore(0), delay_cam_ml_reset(0),
+ delay_fa_ram_wl(0), delay_hit_miss_reset(0),
+ delay_hit_miss(0),
+ subarray(dp, dp.fully_assoc),
+ power_bitline(), per_bitline_read_energy(0),
+ deg_bl_muxing(dp.deg_bl_muxing),
+ num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
+ delay_writeback(0),
+ cell(subarray.cell), cam_cell(subarray.cam_cell),
+ is_dram(dyn_p.is_dram),
+ pure_cam(dyn_p.pure_cam),
+ num_mats(dp.num_mats),
+ power_sa(), delay_sa(0),
+ leak_power_sense_amps_closed_page_state(0),
+ leak_power_sense_amps_open_page_state(0),
+ delay_subarray_out_drv(0),
+ delay_comparator(0), power_comparator(),
+ num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
+ num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
+ num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
+ assert(num_subarrays_per_mat <= 4);
+ assert(num_subarrays_per_row <= 2);
+ is_fa = (dp.fully_assoc) ? true : false;
+ camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
+
+ if (is_fa || pure_cam) {
+ num_subarrays_per_row = num_subarrays_per_mat > 2 ?
+ num_subarrays_per_mat / 2 : num_subarrays_per_mat;
}
- else if (is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+
+ if (dp.use_inp_params == 1) {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ } else {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+
+ }
+
+ double number_sa_subarray;
+
+ if (!is_fa && !pure_cam) {
+ number_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ } else if (is_fa && !pure_cam) {
+ number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
+ }
+
+ else {
+ number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
}
- else
- {
+
+ int num_dec_signals = subarray.num_rows;
+ double C_ld_bit_mux_dec_out = 0;
+ double C_ld_sa_mux_lev_1_dec_out = 0;
+ double C_ld_sa_mux_lev_2_dec_out = 0;
+ double R_wire_wl_drv_out;
+
+ if (!is_fa && !pure_cam) {
+ R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+ } else if (is_fa && !pure_cam) {
+ R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+ } else {
R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
}
- double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
- double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
-
- if (deg_bl_muxing > 1)
- {
- C_ld_bit_mux_dec_out =
- (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (dp.Ndsam_lev_1 > 1)
- {
- C_ld_sa_mux_lev_1_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- C_ld_sa_mux_lev_2_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (num_subarrays_per_row >= 2)
- {
- // wire heads for both right and left side of a mat, so half the resistance
- R_wire_bit_mux_dec_out /= 2.0;
- R_wire_sa_mux_dec_out /= 2.0;
- }
-
-
- row_dec = new Decoder(
- num_dec_signals,
- false,
- subarray.C_wl,
- R_wire_wl_drv_out,
- false/*is_fa*/,
- is_dram,
- true,
- camFlag? cam_cell:cell);
+ double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
+ double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
+
+ if (deg_bl_muxing > 1) {
+ C_ld_bit_mux_dec_out =
+ (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
+ gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (dp.Ndsam_lev_1 > 1) {
+ C_ld_sa_mux_lev_1_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ C_ld_sa_mux_lev_2_dec_out =
+ (num_subarrays_per_mat * number_sa_subarray /
+ (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
+ gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+ num_subarrays_per_row * subarray.num_cols *
+ g_tp.wire_inside_mat.C_per_um * cell.get_w();
+ }
+
+ if (num_subarrays_per_row >= 2) {
+ // wire heads for both right and left side of a mat, so half the resistance
+ R_wire_bit_mux_dec_out /= 2.0;
+ R_wire_sa_mux_dec_out /= 2.0;
+ }
+
+
+ row_dec = new Decoder(
+ num_dec_signals,
+ false,
+ subarray.C_wl,
+ R_wire_wl_drv_out,
+ false/*is_fa*/,
+ is_dram,
+ true,
+ camFlag ? cam_cell : cell);
// if (is_fa && (!dp.is_tag))
// {
// row_dec->exist = true;
// }
- bit_mux_dec = new Decoder(
- deg_bl_muxing,// This number is 1 for FA or CAM
- false,
- C_ld_bit_mux_dec_out,
- R_wire_bit_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_1_dec = new Decoder(
- dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
- dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
- C_ld_sa_mux_lev_1_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
- sa_mux_lev_2_dec = new Decoder(
- dp.Ndsam_lev_2, // This number is 1 for FA or CAM
- false,
- C_ld_sa_mux_lev_2_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell);
-
- double C_wire_predec_blk_out;
- double R_wire_predec_blk_out;
-
- if (!is_fa && !pure_cam)
- {
+ bit_mux_dec = new Decoder(
+ deg_bl_muxing,// This number is 1 for FA or CAM
+ false,
+ C_ld_bit_mux_dec_out,
+ R_wire_bit_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_1_dec = new Decoder(
+ dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
+ dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
+ C_ld_sa_mux_lev_1_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+ sa_mux_lev_2_dec = new Decoder(
+ dp.Ndsam_lev_2, // This number is 1 for FA or CAM
+ false,
+ C_ld_sa_mux_lev_2_dec_out,
+ R_wire_sa_mux_dec_out,
+ false/*is_fa*/,
+ is_dram,
+ false,
+ camFlag ? cam_cell : cell);
+
+ double C_wire_predec_blk_out;
+ double R_wire_predec_blk_out;
+
+ if (!is_fa && !pure_cam) {
+
+ C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
+ R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
+
+ } else { //for pre-decode block's load is same for both FA and CAM
+ C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
+ R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
+ }
- C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
- R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
- }
- else //for pre-decode block's load is same for both FA and CAM
- {
- C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
- R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
- }
-
-
- if (is_fa||pure_cam)
- num_dec_signals += _log2(num_subarrays_per_mat);
-
- PredecBlk * r_predec_blk1 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- true);
- PredecBlk * r_predec_blk2 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- false);
- PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
- PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
- dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
- dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
-
- PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
- PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
- way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
- dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
-
- r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
- b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
- sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
- sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
-
- subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
-
- if (is_fa || pure_cam)
-
- { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
- driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
- cam_bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- if (!pure_cam)
- {
- //This is only used for fully asso not pure CAM
- driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- }
-
- else
- {
- driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
- double w_row_decoder = area_row_decoder / subarray.area.get_h();
-
- double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
- compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
-
- double h_subarray_out_drv = subarray_out_wire->area.get_area() *
- (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
-
-
- h_subarray_out_drv *= (RWP + ERP + SCHP);
-
- double h_comparators = 0.0;
- double w_row_predecode_output_wires = 0.0;
- double h_bit_mux_dec_out_wires = 0.0;
- double h_senseamp_mux_dec_out_wires = 0.0;
-
- if ((!is_fa)&&(dp.is_tag))
- {
- //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
- h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
- h_comparators *= (RWP + ERP);
- }
+ if (is_fa || pure_cam)
+ num_dec_signals += _log2(num_subarrays_per_mat);
+
+ PredecBlk * r_predec_blk1 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ true);
+ PredecBlk * r_predec_blk2 = new PredecBlk(
+ num_dec_signals,
+ row_dec,
+ C_wire_predec_blk_out,
+ R_wire_predec_blk_out,
+ num_subarrays_per_mat,
+ is_dram,
+ false);
+ PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
+ PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
+ PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
+ dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
+ dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
+
+ PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
+ PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
+ PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
+ PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
+ way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
+ dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
+
+ r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
+ b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
+ sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
+ sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
+
+ subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+
+ if (is_fa || pure_cam)
+
+ { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
+ driver_c_gate_load = (subarray.num_cols_fa_cam ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ cam_bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ if (!pure_cam) {
+ //This is only used for fully asso not pure CAM
+ driver_c_gate_load = (subarray.num_cols_fa_ram ) *
+ gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+ is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
+ g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ }
+
+ else {
+ driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
+ bl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+ }
+ double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
+ double w_row_decoder = area_row_decoder / subarray.area.get_h();
+
+ double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
+ compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+
+ double h_subarray_out_drv = subarray_out_wire->area.get_area() *
+ (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
+
+
+ h_subarray_out_drv *= (RWP + ERP + SCHP);
+
+ double h_comparators = 0.0;
+ double w_row_predecode_output_wires = 0.0;
+ double h_bit_mux_dec_out_wires = 0.0;
+ double h_senseamp_mux_dec_out_wires = 0.0;
+
+ if ((!is_fa) && (dp.is_tag)) {
+ //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
+ h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
+ h_comparators *= (RWP + ERP);
+ }
int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
-
- double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
- (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
- h_subarray_out_drv + h_comparators);
-
- double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
-
- if (deg_bl_muxing > 1)
- {
- h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_1 > 1)
- {
- h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
-
- double h_addr_datain_wires;
- if (!g_ip->ver_htree_wires_over_array)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
- (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+
+ double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
+ (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
+ h_subarray_out_drv + h_comparators);
+
+ double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
+
+ if (deg_bl_muxing > 1) {
+ h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_1 > 1) {
+ h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+ if (dp.Ndsam_lev_2 > 1) {
+ h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+ }
+
+ double h_addr_datain_wires;
+ if (!g_ip->ver_htree_wires_over_array) {
+ h_addr_datain_wires = (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat +
+ (dp.num_di_b_mat + dp.num_do_b_mat) /
+ num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+ if (is_fa || pure_cam) {
+ h_addr_datain_wires =
+ (dp.number_addr_bits_mat +
+ dp.number_way_select_signals_mat + //TODO: revisit
+ (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
+ g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
+ (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
+ g_tp.wire_inside_mat.pitch * SCHP;
+ }
+ //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
+ //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
+ h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
+ h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
+ h_addr_datain_wires +
+ h_bit_mux_dec_out_wires +
+ h_senseamp_mux_dec_out_wires;
- if (is_fa || pure_cam)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
- (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
- (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
}
- //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
- //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
- h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
- h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
- h_addr_datain_wires +
- h_bit_mux_dec_out_wires +
- h_senseamp_mux_dec_out_wires;
-
- }
-
- // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
- double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
- b_mux_predec_blk_drv1->area.get_area() +
- sa_mux_lev_1_predec_blk_drv1->area.get_area() +
- sa_mux_lev_2_predec_blk_drv1->area.get_area() +
- way_sel_drv1->area.get_area() +
- r_predec_blk_drv2->area.get_area() +
- b_mux_predec_blk_drv2->area.get_area() +
- sa_mux_lev_1_predec_blk_drv2->area.get_area() +
- sa_mux_lev_2_predec_blk_drv2->area.get_area() +
- r_predec_blk1->area.get_area() +
- b_mux_predec_blk1->area.get_area() +
- sa_mux_lev_1_predec_blk1->area.get_area() +
- sa_mux_lev_2_predec_blk1->area.get_area() +
- r_predec_blk2->area.get_area() +
- b_mux_predec_blk2->area.get_area() +
- sa_mux_lev_1_predec_blk2->area.get_area() +
- sa_mux_lev_2_predec_blk2->area.get_area() +
- bit_mux_dec->area.get_area() +
- sa_mux_lev_1_dec->area.get_area() +
- sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
-
- double area_efficiency_mat;
+
+ // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
+ double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
+ b_mux_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv1->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv1->area.get_area() +
+ way_sel_drv1->area.get_area() +
+ r_predec_blk_drv2->area.get_area() +
+ b_mux_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_1_predec_blk_drv2->area.get_area() +
+ sa_mux_lev_2_predec_blk_drv2->area.get_area() +
+ r_predec_blk1->area.get_area() +
+ b_mux_predec_blk1->area.get_area() +
+ sa_mux_lev_1_predec_blk1->area.get_area() +
+ sa_mux_lev_2_predec_blk1->area.get_area() +
+ r_predec_blk2->area.get_area() +
+ b_mux_predec_blk2->area.get_area() +
+ sa_mux_lev_1_predec_blk2->area.get_area() +
+ sa_mux_lev_2_predec_blk2->area.get_area() +
+ bit_mux_dec->area.get_area() +
+ sa_mux_lev_1_dec->area.get_area() +
+ sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
+
+ double area_efficiency_mat;
// if (!is_fa)
// {
- assert(num_subarrays_per_mat/num_subarrays_per_row>0);
- area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
+ assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
+ area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
+ subarray.area.h + h_non_cell_area;
area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
- area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
- area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
+ area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
+ area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
+ 100.0 / area.get_area();
// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
// cout<<"h_comparators"<<h_comparators<<endl;
@@ -413,8 +412,8 @@ Mat::Mat(const DynamicParameter & dyn_p)
// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
- assert(area.h>0);
- assert(area.w>0);
+ assert(area.h > 0);
+ assert(area.w > 0);
// }
// else
// {
@@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p)
// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
// }
- }
-
-
-
-Mat::~Mat()
-{
- delete row_dec;
- delete bit_mux_dec;
- delete sa_mux_lev_1_dec;
- delete sa_mux_lev_2_dec;
-
- delete r_predec->blk1;
- delete r_predec->blk2;
- delete b_mux_predec->blk1;
- delete b_mux_predec->blk2;
- delete sa_mux_lev_1_predec->blk1;
- delete sa_mux_lev_1_predec->blk2;
- delete sa_mux_lev_2_predec->blk1;
- delete sa_mux_lev_2_predec->blk2;
- delete dummy_way_sel_predec_blk1;
- delete dummy_way_sel_predec_blk2;
-
- delete r_predec->drv1;
- delete r_predec->drv2;
- delete b_mux_predec->drv1;
- delete b_mux_predec->drv2;
- delete sa_mux_lev_1_predec->drv1;
- delete sa_mux_lev_1_predec->drv2;
- delete sa_mux_lev_2_predec->drv1;
- delete sa_mux_lev_2_predec->drv2;
- delete way_sel_drv1;
- delete dummy_way_sel_predec_blk_drv2;
-
- delete r_predec;
- delete b_mux_predec;
- delete sa_mux_lev_1_predec;
- delete sa_mux_lev_2_predec;
-
- delete subarray_out_wire;
- if (!pure_cam)
- delete bl_precharge_eq_drv;
-
- if (is_fa || pure_cam)
- {
- delete sl_precharge_eq_drv ;
- delete sl_data_drv ;
- delete cam_bl_precharge_eq_drv;
- delete ml_precharge_drv;
- delete ml_to_ram_wl_drv;
- }
}
-double Mat::compute_delays(double inrisetime)
-{
- int k;
- double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
- double outrisetime_search, outrisetime, row_dec_outrisetime;
- // delay calculation for tags of fully associative cache
- if (is_fa || pure_cam)
- {
- //Compute search access time
- outrisetime_search = compute_cam_delay(inrisetime);
- if (is_fa)
- {
- bl_precharge_eq_drv->compute_delay(0);
- k = ml_to_ram_wl_drv->number_gates - 1;
- rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
- C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
- tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-
- R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
- R_bl = subarray.num_rows * r_b_metal;
- C_bl = subarray.C_bl;
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-
- outrisetime_search = compute_bitline_delay(outrisetime_search);
- outrisetime_search = compute_sa_delay(outrisetime_search);
- }
- outrisetime_search = compute_subarray_out_drv(outrisetime_search);
- subarray_out_wire->set_in_rise_time(outrisetime_search);
- outrisetime_search = subarray_out_wire->signal_rise_time();
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-
- //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- if (pure_cam)
- {
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- }
- return outrisetime_search;
+Mat::~Mat() {
+ delete row_dec;
+ delete bit_mux_dec;
+ delete sa_mux_lev_1_dec;
+ delete sa_mux_lev_2_dec;
+
+ delete r_predec->blk1;
+ delete r_predec->blk2;
+ delete b_mux_predec->blk1;
+ delete b_mux_predec->blk2;
+ delete sa_mux_lev_1_predec->blk1;
+ delete sa_mux_lev_1_predec->blk2;
+ delete sa_mux_lev_2_predec->blk1;
+ delete sa_mux_lev_2_predec->blk2;
+ delete dummy_way_sel_predec_blk1;
+ delete dummy_way_sel_predec_blk2;
+
+ delete r_predec->drv1;
+ delete r_predec->drv2;
+ delete b_mux_predec->drv1;
+ delete b_mux_predec->drv2;
+ delete sa_mux_lev_1_predec->drv1;
+ delete sa_mux_lev_1_predec->drv2;
+ delete sa_mux_lev_2_predec->drv1;
+ delete sa_mux_lev_2_predec->drv2;
+ delete way_sel_drv1;
+ delete dummy_way_sel_predec_blk_drv2;
+
+ delete r_predec;
+ delete b_mux_predec;
+ delete sa_mux_lev_1_predec;
+ delete sa_mux_lev_2_predec;
+
+ delete subarray_out_wire;
+ if (!pure_cam)
+ delete bl_precharge_eq_drv;
+
+ if (is_fa || pure_cam) {
+ delete sl_precharge_eq_drv ;
+ delete sl_data_drv ;
+ delete cam_bl_precharge_eq_drv;
+ delete ml_precharge_drv;
+ delete ml_to_ram_wl_drv;
+ }
+}
+
+
+
+double Mat::compute_delays(double inrisetime) {
+ int k;
+ double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
+ double outrisetime_search, outrisetime, row_dec_outrisetime;
+ // delay calculation for tags of fully associative cache
+ if (is_fa || pure_cam) {
+ //Compute search access time
+ outrisetime_search = compute_cam_delay(inrisetime);
+ if (is_fa) {
+ bl_precharge_eq_drv->compute_delay(0);
+ k = ml_to_ram_wl_drv->number_gates - 1;
+ rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
+ C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
+ is_dram, false, true);
+ C_ld = ml_to_ram_wl_drv->c_gate_load +
+ ml_to_ram_wl_drv->c_wire_load;
+ tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+
+ R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
+ R_bl = subarray.num_rows * r_b_metal;
+ C_bl = subarray.C_bl;
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+
+ outrisetime_search = compute_bitline_delay(outrisetime_search);
+ outrisetime_search = compute_sa_delay(outrisetime_search);
+ }
+ outrisetime_search = compute_subarray_out_drv(outrisetime_search);
+ subarray_out_wire->set_in_rise_time(outrisetime_search);
+ outrisetime_search = subarray_out_wire->signal_rise_time();
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+
+ //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ if (pure_cam) {
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ }
+ return outrisetime_search;
+ } else {
+ bl_precharge_eq_drv->compute_delay(0);
+ if (row_dec->exist == true) {
+ int k = row_dec->num_gates - 1;
+ double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
+ // TODO: this 4*cell.h number must be revisited
+ double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
+ cell.h, is_dram, false, true) +
+ drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
+ false, true);
+ double C_ld = row_dec->C_ld_dec_out;
+ double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
+ delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+ }
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+ double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * r_b_metal;
+ double C_bl = subarray.C_bl;
+
+ if (is_dram) {
+ delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ } else {
+ delay_bl_restore = bl_precharge_eq_drv->delay +
+ log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+ (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+ (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+ }
+ }
+
+
+
+ outrisetime = r_predec->compute_delays(inrisetime);
+ row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+ outrisetime = b_mux_predec->compute_delays(inrisetime);
+ bit_mux_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+ sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+ outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+ sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+ outrisetime = compute_bitline_delay(row_dec_outrisetime);
+ outrisetime = compute_sa_delay(outrisetime);
+ outrisetime = compute_subarray_out_drv(outrisetime);
+ subarray_out_wire->set_in_rise_time(outrisetime);
+ outrisetime = subarray_out_wire->signal_rise_time();
+
+ delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+ if (dp.is_tag == true && dp.fully_assoc == false) {
+ compute_comparator_delay(0);
}
- else
- {
- bl_precharge_eq_drv->compute_delay(0);
- if (row_dec->exist == true)
- {
- int k = row_dec->num_gates - 1;
- double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
- // TODO: this 4*cell.h number must be revisited
- double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- double C_ld = row_dec->C_ld_dec_out;
- double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
- }
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * r_b_metal;
- double C_bl = subarray.C_bl;
-
- if (is_dram)
- {
- delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- else
- {
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- }
-
-
-
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- outrisetime = compute_subarray_out_drv(outrisetime);
- subarray_out_wire->set_in_rise_time(outrisetime);
- outrisetime = subarray_out_wire->signal_rise_time();
-
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
- if (dp.is_tag == true && dp.fully_assoc == false)
- {
- compute_comparator_delay(0);
- }
-
- if (row_dec->exist == false)
- {
- delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
+
+ if (row_dec->exist == false) {
+ delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
}
- return outrisetime;
+ return outrisetime;
}
-double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
-{
-
- double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
- compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
-
- if (deg_bl_muxing > 1)
- {
- height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
- // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
- }
-
- height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
-
- if (dp.Ndsam_lev_1 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
- }
-
- if (dp.Ndsam_lev_2 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-
- // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
- height += 2 * compute_tr_width_after_folding(
- pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- }
-
- // TODO: this should be uncommented...
- /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
- {
- //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
- double width_write_driver_write_mux = width_write_driver_or_write_mux();
- double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
- cell.w *
- // deg_bl_muxing *
- dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
- height += height_write_driver_write_mux;
- }*/
-
- return height;
+double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
+
+ double height =
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
+ camFlag ? cam_cell.w :
+ cell.w / (2 * (RWP + ERP + SCHP))) +
+ // precharge circuitry
+ compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
+ camFlag ? cam_cell.w :
+ cell.w / (RWP + ERP + SCHP));
+
+ if (deg_bl_muxing > 1) {
+ // col mux tr height
+ height +=
+ compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
+ cell.w / (2 * (RWP + ERP)));
+ // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
+ }
+
+ height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
+
+ if (dp.Ndsam_lev_1 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+ }
+
+ if (dp.Ndsam_lev_2 > 1) {
+ height += compute_tr_width_after_folding(
+ g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
+ //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+
+ // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
+ height += 2 * compute_tr_width_after_folding(
+ pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+ }
+
+ // TODO: this should be uncommented...
+ /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
+ {
+ //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
+ double width_write_driver_write_mux = width_write_driver_or_write_mux();
+ double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
+ cell.w *
+ // deg_bl_muxing *
+ dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
+ height += height_write_driver_write_mux;
+ }*/
+
+ return height;
}
-double Mat::compute_cam_delay(double inrisetime)
-{
+double Mat::compute_cam_delay(double inrisetime) {
- double out_time_ramp, this_delay;
- double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
+ double out_time_ramp, this_delay;
+ double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
- double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
+ double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
- double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
- int Htagbits;
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
- //double searchline_precharge_time;
-
- double leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double leak_power_SCHP_port_sram_cell = 0;
- double leak_comparator_cam_cell =0;
-
- double gate_leak_comparator_cam_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_SCHP_port_sram_cell = 0;
-
- c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
- c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
- r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
- r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
-
- dynSearchEng = 0.0;
- delay_matchchline = 0.0;
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
- bool linear_scaling = false;
-
- if (linear_scaling)
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
- }
- else
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
- Wdummyn = g_tp.cam.cell_nmos_w;
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- }
-
- Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
-
- /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
- search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
- From the driver(am and an) to the comparators in all the rows including the dummy row,
- Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
-
- //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
- //Searchline precharge routes horizontally
- driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-
- sl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
- //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
- sl_data_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- sl_precharge_eq_drv->compute_delay(0);
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
- double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
- double R_bl = (subarray.num_rows + 1) * r_b_metal;
- double C_bl = subarray.C_bl_cam;
- delay_cam_sl_restore = sl_precharge_eq_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
- out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
-
- //matchline ops delay
- delay_matchchline += sl_data_drv->delay;
-
- /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
- //matchline delay, matchline power, matchline_reset for cycle time computation,
-
- ////matchline precharge circuitry routes vertically
- //There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-
- ml_precharge_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- ml_precharge_drv->compute_delay(0);
-
-
- rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
- c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
- + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
-
- Cwire = c_matchline_metal * Htagbits;
- Rwire = r_matchline_metal * Htagbits;
- c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
-
- double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_ml = Rwire;
- double C_ml = Cwire + c_intrinsic;
- delay_cam_ml_reset = ml_precharge_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
-
- //matchline ops delay
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
- delay_matchchline += this_delay;
- out_time_ramp = this_delay / VTHFA3;
-
- dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
- * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
-
- /* third stage, from the NAND2 gates to the drivers in the dummy row */
- rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
- c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
- c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
- out_time_ramp = this_delay / (1 - VTHFA4);
- delay_matchchline += this_delay;
-
- //only the dummy row has the extra inverter between NAND and NOR gates
- dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
-
- /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
- rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
- c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
- Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
- c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
- tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
- out_time_ramp = this_delay / VTHFA5;
- delay_matchchline += this_delay;
-
- dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
- /*final statge from the NOR gate to drive the wordline of the data portion */
-
- //searchline data driver There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
- driver_c_wire_load = subarray.C_wl_ram;
- driver_r_wire_load = subarray.R_wl_ram;
-
- ml_to_ram_wl_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
-
-
- rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
- c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
- out_time_ramp = this_delay / (1-0.5);
- delay_matchchline += this_delay;
-
- out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
-
- //c_gate_load energy is computed in ml_to_ram_wl_drv
- dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-
- /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
- /*Precharge the hitting logic */
- c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_hit_miss = Rwire;
- double C_hit_miss = Cwire + c_intrinsic;
- delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /*hitting logic evaluation */
- c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-
- delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
-
- if (is_fa)
- delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
-
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
-
- power_matchline.searchOp.dynamic = dynSearchEng;
-
- //leakage in one subarray
- double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
- double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
- leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
- leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
-
- power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
- leak_comparator_cam_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP +
- leak_power_SCHP_port_sram_cell*SCHP;
+ double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
+ int Htagbits;
+
+ double driver_c_gate_load;
+ double driver_c_wire_load;
+ double driver_r_wire_load;
+ //double searchline_precharge_time;
+
+ double leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double leak_power_SCHP_port_sram_cell = 0;
+ double leak_comparator_cam_cell =0;
+
+ double gate_leak_comparator_cam_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_SCHP_port_sram_cell = 0;
+
+ c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
+ c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
+ r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
+ r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
+
+ dynSearchEng = 0.0;
+ delay_matchchline = 0.0;
+ double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
+ bool linear_scaling = false;
+
+ if (linear_scaling) {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
+ } else {
+ Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+ Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+ Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+ Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+ Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
+ Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+ Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
+ Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
+ Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+ Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
+
+ Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
+ Wdummyn = g_tp.cam.cell_nmos_w;
+ Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
+ Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
+ Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
+ Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
+ Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
+ W_hit_miss_n = Wdummyn;
+ W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
+ }
+
+ Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
+
+ /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
+ search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
+ From the driver(am and an) to the comparators in all the rows including the dummy row,
+ Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
+
+ //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
+ //Searchline precharge routes horizontally
+ driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+ driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+ driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+
+ sl_precharge_eq_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
+ //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+ sl_data_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ sl_precharge_eq_drv->compute_delay(0);
+ double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
+ double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = (subarray.num_rows + 1) * r_b_metal;
+ double C_bl = subarray.C_bl_cam;
+ delay_cam_sl_restore = sl_precharge_eq_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+ out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
+
+ //matchline ops delay
+ delay_matchchline += sl_data_drv->delay;
+
+ /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
+ //matchline delay, matchline power, matchline_reset for cycle time computation,
+
+ ////matchline precharge circuitry routes vertically
+ //There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
+ driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+ driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+
+ ml_precharge_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+ ml_precharge_drv->compute_delay(0);
+
+
+ rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
+ c_intrinsic = Htagbits *
+ (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
+ is_dram)//TODO: the cell_h_def should be revisit
+ + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
+ Htagbits);//since each halve only has one precharge tx per matchline
+
+ Cwire = c_matchline_metal * Htagbits;
+ Rwire = r_matchline_metal * Htagbits;
+ c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
+
+ double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_ml = Rwire;
+ double C_ml = Cwire + c_intrinsic;
+ //TODO: latest CAM has sense amps on matchlines too
+ delay_cam_ml_reset = ml_precharge_drv->delay
+ + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
+
+ //matchline ops delay
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
+ delay_matchchline += this_delay;
+ out_time_ramp = this_delay / VTHFA3;
+
+ dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
+ (subarray.num_rows + 1)) //TODO: need to be precise
+ * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
+ 2;//each subarry has two halves
+
+ /* third stage, from the NAND2 gates to the drivers in the dummy row */
+ rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
+ c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
+ c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
+ out_time_ramp = this_delay / (1 - VTHFA4);
+ delay_matchchline += this_delay;
+
+ //only the dummy row has the extra inverter between NAND and NOR gates
+ dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
+
+ /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
+ rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
+ c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_matchline_metal * Htagbits + c_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ Rwire = r_matchline_metal * Htagbits + r_searchline_metal *
+ (subarray.num_rows + 1) / 2;
+ c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
+ tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
+ out_time_ramp = this_delay / VTHFA5;
+ delay_matchchline += this_delay;
+
+ dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
+ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+ /*final statge from the NOR gate to drive the wordline of the data portion */
+
+ //searchline data driver There are two matchline precharge driver chains per subarray.
+ driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
+ driver_c_wire_load = subarray.C_wl_ram;
+ driver_r_wire_load = subarray.R_wl_ram;
+
+ ml_to_ram_wl_drv = new Driver(
+ driver_c_gate_load,
+ driver_c_wire_load,
+ driver_r_wire_load,
+ is_dram);
+
+
+
+ rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
+ c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
+ out_time_ramp = this_delay / (1 - 0.5);
+ delay_matchchline += this_delay;
+
+ out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
+
+ //c_gate_load energy is computed in ml_to_ram_wl_drv
+ dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+
+ /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
+ /*Precharge the hitting logic */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
+ //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+ double R_hit_miss = Rwire;
+ double C_hit_miss = Cwire + c_intrinsic;
+ delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
+ (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /*hitting logic evaluation */
+ c_intrinsic = 2 *
+ drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+ Cwire = c_searchline_metal * subarray.num_rows;
+ Rwire = r_searchline_metal * subarray.num_rows;
+ c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+ subarray.num_rows;
+
+ rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
+ tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+
+ delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
+
+ if (is_fa)
+ delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
+
+ dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+ /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
+
+ power_matchline.searchOp.dynamic = dynSearchEng;
+
+ //leakage in one subarray
+ double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
+ double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
+ double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ //approx XOR with Inv
+ double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
+ leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
+ leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
+
+ power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
+ leak_comparator_cam_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP +
+ leak_power_SCHP_port_sram_cell * SCHP;
// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
- power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
- //In idle states, the hit/miss txs are closed (on) therefore no Isub
- power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+ power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+ cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ //In idle states, the hit/miss txs are closed (on) therefore no Isub
+ power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
// + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
-
- gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- gate_leak_power_SCHP_port_sram_cell = 0;
-
- //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
-
- power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
- power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
- power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
- + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
-
-
- return out_time_ramp;
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+ 1, inv, false, true) * 2;
+ double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
+ false, true) * 2;
+
+ gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ gate_leak_power_SCHP_port_sram_cell = 0;
+
+ //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
+
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_cc_inverters_sram_cell;
+ power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
+ power_matchline.searchOp.gate_leakage +=
+ gate_leak_power_SCHP_port_sram_cell * SCHP +
+ gate_leak_power_RD_port_sram_cell * ERP;
+ power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
+ subarray.num_cols_fa_cam;//TODO:dumy line precise
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+ cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+ power_matchline.searchOp.gate_leakage += subarray.num_rows *
+ cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
+ + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
+
+
+ return out_time_ramp;
}
-double Mat::width_write_driver_or_write_mux()
-{
- // calculate resistance of SRAM cell pull-up PMOS transistor
- // cam and sram have same cell trasistor properties
- double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
- double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
- double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
- double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
+double Mat::width_write_driver_or_write_mux() {
+ // calculate resistance of SRAM cell pull-up PMOS transistor
+ // cam and sram have same cell trasistor properties
+ double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
+ double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
+ double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
+ double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
- return width_write_driver_nmos;
+ return width_write_driver_nmos;
}
@@ -1007,134 +1032,164 @@ double Mat::width_write_driver_or_write_mux()
double Mat::compute_comparators_height(
int tagbits,
int number_ways_in_mat,
- double subarray_mem_cell_area_width)
-{
- double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
- double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
- return cumulative_area / subarray_mem_cell_area_width;
+ double subarray_mem_cell_area_width) {
+ double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
+ double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
+ return cumulative_area / subarray_mem_cell_area_width;
}
-double Mat::compute_bitline_delay(double inrisetime)
-{
- double V_b_pre, v_th_mem_cell, V_wl;
- double tstep;
- double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
- double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
- int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
-
- double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * R_b_metal;
- double C_bl = subarray.C_bl;
-
- // TODO: no leakage for DRAMs?
- double leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
-
- if (is_dram == true)
- {
- V_b_pre = g_tp.dram.Vbitpre;
- v_th_mem_cell = g_tp.dram_acc.Vth;
- V_wl = g_tp.vpp;
- //The access transistor is not folded. So we just need to specify a threshold value for the
- //folding width that is equal to or greater than Wmemcella.
- R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
- r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
- }
- else
- { //SRAM
- V_b_pre = g_tp.sram.Vbitpre;
- v_th_mem_cell = g_tp.sram_cell.Vth;
- V_wl = g_tp.sram_cell.Vdd;
- R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
- R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
-
- //Leakage current of an SRAM cell
- double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
- double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+double Mat::compute_bitline_delay(double inrisetime) {
+ double V_b_pre, v_th_mem_cell, V_wl;
+ double tstep;
+ double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
+ double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
+ int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
+
+ double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
+ double R_bl = subarray.num_rows * R_b_metal;
+ double C_bl = subarray.C_bl;
+
+ // TODO: no leakage for DRAMs?
+ double leak_power_cc_inverters_sram_cell = 0;
+ double gate_leak_power_cc_inverters_sram_cell = 0;
+ double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+ double leak_power_RD_port_sram_cell = 0;
+ double gate_leak_power_RD_port_sram_cell = 0;
+
+ if (is_dram == true) {
+ V_b_pre = g_tp.dram.Vbitpre;
+ v_th_mem_cell = g_tp.dram_acc.Vth;
+ V_wl = g_tp.vpp;
+ //The access transistor is not folded. So we just need to specify a
+ // threshold value for the folding width that is equal to or greater
+ // than Wmemcella.
+ R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
+ r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
+ } else { //SRAM
+ V_b_pre = g_tp.sram.Vbitpre;
+ v_th_mem_cell = g_tp.sram_cell.Vth;
+ V_wl = g_tp.sram_cell.Vdd;
+ R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
+ R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
+
+ //Leakage current of an SRAM cell
+ //TODO: how much is the idle time? just by *2?
+ double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,
+ false, true);
+ double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true) * 2;//two invs per cell
+
+ leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
+ leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
+ leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
+
+
+ //in idle state, Ig_on only possibly exist in access transistors of read only ports
+ double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+ false, true);
+ double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
+ g_tp.sram.cell_pmos_w, 1, inv, false,
+ true);
+
+ gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
+ gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+ }
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
-
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- }
-
-
- double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
- double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
- double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
- double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-
- if (is_dram)
- {
- double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
- tstep = 2.3 * fraction * r_dev *
- (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
- (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
- delay_writeback = tstep;
- dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
- per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
- }
- else
- {
- double tau;
-
- if (deg_bl_muxing > 1)
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
- subarray.num_cols * num_subarrays_per_mat*/;
- dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
- dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
- //Write Ops are differential for SRAM
- }
- else
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+ double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w /
+ (2 * (RWP + ERP + SCHP)), is_dram);
+ double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
+ double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+ double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
+ double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
+ is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+ double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram);
+
+ if (is_dram) {
+ double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_C /
+ (g_tp.dram_cell_C + C_bl));
+ tstep = 2.3 * fraction * r_dev *
+ (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux)) /
+ (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux));
+ delay_writeback = tstep;
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
+ (g_tp.dram_cell_Vdd / 2) *
+ g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
+ num_act_mats_hor_dir * 100;
+ per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) *
+ (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
+ } else {
+ double tau;
+
+ if (deg_bl_muxing > 1) {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
+ C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) +
+ R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+ C_sense_amp_latch + C_drain_sense_amp_mux) +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
+ g_tp.sram_cell.Vdd;
+ dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+ C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
+ (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing);
+ dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+ //Write Ops are differential for SRAM
+ } else {
+ tau = (R_cell_pull_down + R_cell_acc) *
+ (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
+ R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+ dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+ 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+ dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+ deg_bl_muxing) / deg_senseamp_muxing) *
+ num_act_mats_hor_dir * C_bl) *
+ g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+
+ }
+ tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
+ power_bitline.readOp.leakage =
+ leak_power_cc_inverters_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell +
+ leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+ leak_power_RD_port_sram_cell * ERP;
+ power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
+ gate_leak_power_RD_port_sram_cell * ERP;
}
- tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
- power_bitline.readOp.leakage =
- leak_power_cc_inverters_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP;
- power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
- gate_leak_power_RD_port_sram_cell * ERP;
-
- }
// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
@@ -1142,607 +1197,684 @@ double Mat::compute_bitline_delay(double inrisetime)
// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
- /* take input rise time into account */
- double m = V_wl / inrisetime;
- if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
- {
- delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
- }
- else
- {
- delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
- }
+ /* take input rise time into account */
+ double m = V_wl / inrisetime;
+ if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
+ delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
+ } else {
+ delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
+ }
- bool is_fa = (dp.fully_assoc) ? true : false;
+ bool is_fa = (dp.fully_assoc) ? true : false;
- if (dp.is_tag == false || is_fa == false)
- {
- power_bitline.readOp.dynamic = dynRdEnergy;
- power_bitline.writeOp.dynamic = dynWriteEnergy;
- }
+ if (dp.is_tag == false || is_fa == false) {
+ power_bitline.readOp.dynamic = dynRdEnergy;
+ power_bitline.writeOp.dynamic = dynWriteEnergy;
+ }
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_sa_delay(double inrisetime)
-{
- //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
-
- //Bitline circuitry leakage.
- double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
- double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
- double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
- double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
-
- double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
- //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
- double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
- //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
- // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
- double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
- leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
-
- // sense amplifier has to drive logic in "data out driver" and sense precharge load.
- // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
- //constant as well as the magnitude of input differential voltage.
- double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double tau = C_ld / g_tp.gm_sense_amp_latch;
- delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
- power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
+double Mat::compute_sa_delay(double inrisetime) {
+ //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
+
+ //Bitline circuitry leakage.
+ double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
+ double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
+ double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
+ double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
+
+ double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
+ //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
+ double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
+ //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
+ // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
+ double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+ leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+
+ // sense amplifier has to drive logic in "data out driver" and sense precharge load.
+ // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
+ //constant as well as the magnitude of input differential voltage.
+ double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+ drain_C_(g_tp.w_sense_n, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
+ (RWP + ERP + SCHP), is_dram) +
+ drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram);
+ double tau = C_ld / g_tp.gm_sense_amp_latch;
+ delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
+ power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
num_subarrays_per_mat * num_act_mats_hor_dir*/;
- power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
+ power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
- double outrisetime = 0;
- return outrisetime;
+ double outrisetime = 0;
+ return outrisetime;
}
-double Mat::compute_subarray_out_drv(double inrisetime)
-{
- double C_ld, rd, tf, this_delay;
- double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
-
- // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
- // delay of signal through inverter-buffer to second level of sense-amp mux.
- // internal delay of buffer
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
- // inverter driving drain of pass transistor of second level of sense-amp mux.
- rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
- C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
-
- // delay of signal through pass-transistor to input of subarray output driver.
- rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
- C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
- //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-
-
- return inrisetime;
+double Mat::compute_subarray_out_drv(double inrisetime) {
+ double C_ld, rd, tf, this_delay;
+ double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
+
+ // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+ camFlag ? cam_cell.w : cell.w *
+ deg_bl_muxing / (RWP + ERP + SCHP),
+ is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+ // delay of signal through inverter-buffer to second level of sense-amp mux.
+ // internal delay of buffer
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv, is_dram) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+ // inverter driving drain of pass transistor of second level of sense-amp mux.
+ rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+ C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
+ is_dram) +
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+ cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
+ (RWP + ERP + SCHP), is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage +=
+ cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+ inv) * g_tp.peri_global.Vdd;
+
+
+ // delay of signal through pass-transistor to input of subarray output driver.
+ rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+ C_ld = dp.Ndsam_lev_2 *
+ drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
+ cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
+ is_dram) +
+ //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+ gate_C(subarray_out_wire->repeater_size *
+ (subarray_out_wire->wire_length /
+ subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
+ (1 + p_to_n_sz_r), 0.0, is_dram);
+ tf = rd * C_ld;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay_subarray_out_drv += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+ power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
+ power_subarray_out_drv.readOp.gate_leakage +=
+ cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+
+
+ return inrisetime;
}
-double Mat::compute_comparator_delay(double inrisetime)
-{
- int A = g_ip->tag_assoc;
-
- int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
- // a multiple of 4.
-
- /* First Inverter */
- double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
- double tf = Req*Ceq;
- double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
- double nextinputtime = st1del/VTHCOMPINV;
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-
- //For each degree of associativity
- //there are 4 such quarter comparators
- double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- /* Second Inverter */
- Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
- tf = Req*Ceq;
- double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
- nextinputtime = st2del/(1.0-VTHCOMPINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-
- /* Third Inverter */
- Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
- tf = Req*Ceq;
- double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
- nextinputtime = st3del/(VTHEVALINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-
- /* Final Inverter (virtual ground driver) discharging compare part */
- double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
- double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
- double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
- double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
- power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
- lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
-
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
-
- /* time to go to threshold of mux driver */
- double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
- /* take into account non-zero input rise time */
- double m = g_tp.peri_global.Vdd/nextinputtime;
- double Tcomparatorni;
-
- if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
- {
- double a = m;
- double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
- }
- else
- {
- Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
- }
- delay_comparator = Tcomparatorni+st1del+st2del+st3del;
- power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
- power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
-
- return Tcomparatorni / (1.0 - VTHMUXNAND);;
+double Mat::compute_comparator_delay(double inrisetime) {
+ int A = g_ip->tag_assoc;
+
+ int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
+ // a multiple of 4.
+
+ /* First Inverter */
+ double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
+ double tf = Req * Ceq;
+ double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
+ double nextinputtime = st1del / VTHCOMPINV;
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+
+ //For each degree of associativity
+ //there are 4 such quarter comparators
+ double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
+ g_tp.w_comp_inv_p1, 1, inv,
+ is_dram) * 4 * A;
+ /* Second Inverter */
+ Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
+ nextinputtime = st2del / (1.0 - VTHCOMPINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+ inv, is_dram) * 4 * A;
+
+ /* Third Inverter */
+ Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
+ drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
+ tf = Req * Ceq;
+ double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
+ nextinputtime = st3del / (VTHEVALINV);
+ power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
+ inv, is_dram) * 4 * A;
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
+ 1, inv, is_dram) * 4 * A;
+
+ /* Final Inverter (virtual ground driver) discharging compare part */
+ double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
+ double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
+ double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+ double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+ g_tp.cell_h_def, is_dram) +
+ drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+ g_tp.cell_h_def, is_dram)) +
+ drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+ gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
+ power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+ power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A; // stack factor of 0.2
+
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+ inv, is_dram) * 4 * A;
+ //for gate leakage this equals to a inverter
+ gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+ is_dram) * 4 * A;
+
+ /* time to go to threshold of mux driver */
+ double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
+ /* take into account non-zero input rise time */
+ double m = g_tp.peri_global.Vdd / nextinputtime;
+ double Tcomparatorni;
+
+ if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
+ double a = m;
+ double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
+ g_tp.peri_global.Vth);
+ double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
+ g_tp.peri_global.Vth) + 1 / m *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
+ ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
+ Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
+ } else {
+ Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
+ g_tp.peri_global.Vth) / (2 * m) -
+ (g_tp.peri_global.Vdd * VTHEVALINV) / m;
+ }
+ delay_comparator = Tcomparatorni + st1del + st2del + st3del;
+ power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
+ power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
+
+ return Tcomparatorni / (1.0 - VTHMUXNAND);;
}
-void Mat::compute_power_energy()
-{
- //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
+void Mat::compute_power_energy() {
+ //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
//when search all subarrays and all mats are fully active
- //when plain read/write only one subarray in a single mat is active.
+ //when plain read/write only one subarray in a single mat is active.
// add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
- power.readOp.dynamic += r_predec->power.readOp.dynamic +
- b_mux_predec->power.readOp.dynamic +
- sa_mux_lev_1_predec->power.readOp.dynamic +
- sa_mux_lev_2_predec->power.readOp.dynamic;
-
- // add energy consumed in decoders
- power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
- if (!(is_fa||pure_cam))
- power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
-
- // add energy consumed in bitline prechagers, SAs, and bitlines
- if (!(is_fa||pure_cam))
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
-
- // add energy consumed in bitlines
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
- power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
- //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
- //Add subarray output energy
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
- }
-
- else if (is_fa)
- {
- //for plain read/write only one subarray in a mat is active
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
- + cam_bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-
- //Add sense amps energy
- num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
- num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
- power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
- power_sa.readOp.dynamic *= num_sa_subarray;
-
-
- // add energy consumed in bitlines
- power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
- power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
-
- //Add subarray output energy
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
- //add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
- else
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
- power_sa.searchOp.dynamic = 0;
-
- power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
- power_bitline.searchOp.dynamic = 0;
- power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
-
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
-
- ////add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
-
-
-
- // calculate leakage power
- if (!(is_fa || pure_cam))
- {
- int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.leakage += power_comparator.readOp.leakage;
-
- //cout<<"leakage1"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
-
- power.readOp.leakage += r_predec->power.readOp.leakage +
- b_mux_predec->power.readOp.leakage +
- sa_mux_lev_1_predec->power.readOp.leakage +
- sa_mux_lev_2_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage +
- power_bit_mux_decoders.readOp.leakage +
- power_sa_mux_lev_1_decoders.readOp.leakage +
- power_sa_mux_lev_2_decoders.readOp.leakage;
- //cout<<"leakage2"<<power.readOp.leakage<<endl;
-
- //++++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
- //cout<<"leakage"<<power.readOp.leakage<<endl;
-
- power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
-
- //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
-
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- b_mux_predec->power.readOp.gate_leakage +
- sa_mux_lev_1_predec->power.readOp.gate_leakage +
- sa_mux_lev_2_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage +
- power_bit_mux_decoders.readOp.gate_leakage +
- power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- power_sa_mux_lev_2_decoders.readOp.gate_leakage;
- }
- else if (is_fa)
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.leakage<<endl;
-
-
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- //cout<<"leakage4"<<power.readOp.leakage<<endl;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
-
- //cout<<"leakage5"<<power.readOp.leakage<<endl;
-
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
-// cout<<"leakage6"<<power.readOp.leakage<<endl;
-
- //+++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
-
-
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
-
- //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.dynamic += r_predec->power.readOp.dynamic +
+ b_mux_predec->power.readOp.dynamic +
+ sa_mux_lev_1_predec->power.readOp.dynamic +
+ sa_mux_lev_2_predec->power.readOp.dynamic;
+
+ // add energy consumed in decoders
+ power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
+ if (!(is_fa || pure_cam))
+ power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
+
+ // add energy consumed in bitline prechagers, SAs, and bitlines
+ if (!(is_fa || pure_cam)) {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
+
+ // add energy consumed in bitlines
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
+ power_bitline.readOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
+ subarray.num_cols;
+ //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
+ //Add subarray output energy
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+ }
- //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+ else if (is_fa) {
+ //for plain read/write only one subarray in a mat is active
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
+ + cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+
+ //Add sense amps energy
+ num_sa_subarray = (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram) / deg_bl_muxing;
+ num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
+ power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
+ num_sa_subarray_search;
+ power_sa.readOp.dynamic *= num_sa_subarray;
+
+
+ // add energy consumed in bitlines
+ power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
+ power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
+ subarray.num_cols_fa_ram);
+ power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
+
+ //Add subarray output energy
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+ //add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ } else {
+ // add energy consumed in bitline prechagers
+ power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+ //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
+
+ //Add sense amps energy
+ num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
+ power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
+ power_sa.searchOp.dynamic = 0;
+
+ power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
+ power_bitline.searchOp.dynamic = 0;
+ power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
+
+ power_subarray_out_drv.searchOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+ power_subarray_out_drv.readOp.dynamic =
+ (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+ power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+ power_sa.readOp.dynamic +
+ power_bitline.readOp.dynamic +
+ power_subarray_out_drv.readOp.dynamic;
+
+ power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+ bit_mux_dec->power.readOp.dynamic +
+ sa_mux_lev_1_dec->power.readOp.dynamic +
+ sa_mux_lev_2_dec->power.readOp.dynamic +
+ power_comparator.readOp.dynamic;
+
+
+ ////add energy consumed inside cam
+ power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+ power_searchline_precharge = sl_precharge_eq_drv->power;
+ power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_searchline = sl_data_drv->power;
+ power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+ subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+ power_matchline_precharge = ml_precharge_drv->power;
+ power_matchline_precharge.searchOp.dynamic =
+ power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+ power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+ power_ml_to_ram_wl_drv.searchOp.dynamic =
+ ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power_cam_all_active.searchOp.dynamic =
+ power_matchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline_precharge.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_searchline.searchOp.dynamic;
+ power_cam_all_active.searchOp.dynamic +=
+ power_matchline_precharge.searchOp.dynamic;
+
+ power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+ //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+ }
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
- else
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ // calculate leakage power
+ if (!(is_fa || pure_cam)) {
+ int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.leakage += power_comparator.readOp.leakage;
+
+ //cout<<"leakage1"<<power.readOp.leakage<<endl;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
+
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ b_mux_predec->power.readOp.leakage +
+ sa_mux_lev_1_predec->power.readOp.leakage +
+ sa_mux_lev_2_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage +
+ power_bit_mux_decoders.readOp.leakage +
+ power_sa_mux_lev_1_decoders.readOp.leakage +
+ power_sa_mux_lev_2_decoders.readOp.leakage;
+ //cout<<"leakage2"<<power.readOp.leakage<<endl;
+
+ //++++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP);
+
+ //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+ //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+ power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
+ power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
+
+ //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
+
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ b_mux_predec->power.readOp.gate_leakage +
+ sa_mux_lev_1_predec->power.readOp.gate_leakage +
+ sa_mux_lev_2_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage +
+ power_bit_mux_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+ power_sa_mux_lev_2_decoders.readOp.gate_leakage;
+ } else if (is_fa) {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
- power.readOp.leakage += //power_bitline.readOp.leakage +
- //power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
+ //cout<<"leakage3"<<power.readOp.leakage<<endl;
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+ power.readOp.leakage += power_bitline.readOp.leakage +
+ power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
- //+++Below is gate leakage
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+ //cout<<"leakage4"<<power.readOp.leakage<<endl;
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+ //cout<<"leakage5"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
- //power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *=
+ num_subarrays_per_mat;
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+// cout<<"leakage6"<<power.readOp.leakage<<endl;
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
+ //+++Below is gate leakage
+ power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+
+ } else {
+ int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+ //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+ //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+ power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+ (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.leakage =
+ (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.leakage += //power_bitline.readOp.leakage +
+ //power_bl_precharge_eq_drv.readOp.leakage +
+ power_bl_precharge_eq_drv.searchOp.leakage +
+ power_sa.readOp.leakage +
+ power_subarray_out_drv.readOp.leakage;
+
+ // leakage power
+ power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
+ subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.leakage += r_predec->power.readOp.leakage +
+ power_row_decoders.readOp.leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_precharge_eq_drv->power.readOp.leakage;
+ power_cam_all_active.searchOp.leakage +=
+ sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+ power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+ //+++Below is gate leakage
+ power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+ power_sa.readOp.gate_leakage *= num_sa_subarray *
+ num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+
+ power_subarray_out_drv.readOp.gate_leakage =
+ (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+ number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+ power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
+ //power_bl_precharge_eq_drv.readOp.gate_leakage +
+ power_bl_precharge_eq_drv.searchOp.gate_leakage +
+ power_sa.readOp.gate_leakage +
+ power_subarray_out_drv.readOp.gate_leakage;
+
+ // gate_leakage power
+ power_row_decoders.readOp.gate_leakage =
+ row_dec->power.readOp.gate_leakage * subarray.num_rows *
+ num_subarrays_per_mat * (RWP + ERP + EWP);
+ power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+ power_row_decoders.readOp.gate_leakage;
+
+ //inside cam
+ power_cam_all_active.searchOp.gate_leakage =
+ power_matchline.searchOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_precharge_eq_drv->power.readOp.gate_leakage;
+ power_cam_all_active.searchOp.gate_leakage +=
+ sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+ power_cam_all_active.searchOp.gate_leakage +=
+ ml_precharge_drv->power.readOp.dynamic;
+ power_cam_all_active.searchOp.gate_leakage *=
+ num_subarrays_per_mat;
+
+ power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+ }
}