summaryrefslogtreecommitdiff
path: root/ext/mcpat/cacti/decoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mcpat/cacti/decoder.cc')
-rw-r--r--ext/mcpat/cacti/decoder.cc2241
1 files changed, 1081 insertions, 1160 deletions
diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc
index 0de6f6157..7fa66b4ff 100644
--- a/ext/mcpat/cacti/decoder.cc
+++ b/ext/mcpat/cacti/decoder.cc
@@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@@ -51,207 +52,184 @@ Decoder::Decoder(
bool is_dram_,
bool is_wl_tr_,
const Area & cell_)
-:exist(false),
- C_ld_dec_out(_C_ld_dec_out),
- R_wire_dec_out(_R_wire_dec_out),
- num_gates(0), num_gates_min(2),
- delay(0),
- //power(),
- fully_assoc(fully_assoc_), is_dram(is_dram_),
- is_wl_tr(is_wl_tr_), cell(cell_)
-{
-
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- w_dec_n[i] = 0;
- w_dec_p[i] = 0;
- }
-
- /*
- * _num_dec_signals is the number of decoded signal as output
- * num_addr_bits_dec is the number of signal to be decoded
- * as the decoders input.
- */
- int num_addr_bits_dec = _log2(_num_dec_signals);
-
- if (num_addr_bits_dec < 4)
- {
- if (flag_way_select)
- {
- exist = true;
- num_in_signals = 2;
+ : exist(false),
+ C_ld_dec_out(_C_ld_dec_out),
+ R_wire_dec_out(_R_wire_dec_out),
+ num_gates(0), num_gates_min(2),
+ delay(0),
+ //power(),
+ fully_assoc(fully_assoc_), is_dram(is_dram_),
+ is_wl_tr(is_wl_tr_), cell(cell_) {
+
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ w_dec_n[i] = 0;
+ w_dec_p[i] = 0;
}
- else
- {
- num_in_signals = 0;
- }
- }
- else
- {
- exist = true;
- if (flag_way_select)
- {
- num_in_signals = 3;
- }
- else
- {
- num_in_signals = 2;
+ /*
+ * _num_dec_signals is the number of decoded signal as output
+ * num_addr_bits_dec is the number of signal to be decoded
+ * as the decoders input.
+ */
+ int num_addr_bits_dec = _log2(_num_dec_signals);
+
+ if (num_addr_bits_dec < 4) {
+ if (flag_way_select) {
+ exist = true;
+ num_in_signals = 2;
+ } else {
+ num_in_signals = 0;
+ }
+ } else {
+ exist = true;
+
+ if (flag_way_select) {
+ num_in_signals = 3;
+ } else {
+ num_in_signals = 2;
+ }
}
- }
- assert(cell.h>0);
- assert(cell.w>0);
- // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
- //area.h = 4 * cell.h;
- area.h = g_tp.h_dec * cell.h;
+ assert(cell.h > 0);
+ assert(cell.w > 0);
+ // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
+ //area.h = 4 * cell.h;
+ area.h = g_tp.h_dec * cell.h;
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void Decoder::compute_widths()
-{
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
-
- if (exist)
- {
- if (num_in_signals == 2 || fully_assoc)
- {
- w_dec_n[0] = 2 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2;
+void Decoder::compute_widths() {
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+
+ if (exist) {
+ if (num_in_signals == 2 || fully_assoc) {
+ w_dec_n[0] = 2 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else {
+ w_dec_n[0] = 3 * g_tp.min_w_nmos_;
+ w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+
+ F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
+ gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
+ num_gates = logical_effort(
+ num_gates_min,
+ num_in_signals == 2 ? gnand2 : gnand3,
+ F,
+ w_dec_n,
+ w_dec_p,
+ C_ld_dec_out,
+ p_to_n_sz_ratio,
+ is_dram,
+ is_wl_tr,
+ g_tp.max_w_nmos_dec);
}
- else
- {
- w_dec_n[0] = 3 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3;
- }
-
- F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
- gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
- num_gates = logical_effort(
- num_gates_min,
- num_in_signals == 2 ? gnand2 : gnand3,
- F,
- w_dec_n,
- w_dec_p,
- C_ld_dec_out,
- p_to_n_sz_ratio,
- is_dram,
- is_wl_tr,
- g_tp.max_w_nmos_dec);
- }
}
-void Decoder::compute_area()
-{
- double cumulative_area = 0;
- double cumulative_curr = 0; // cumulative leakage current
- double cumulative_curr_Ig = 0; // cumulative leakage current
-
- if (exist)
- { // First check if this decoder exists
- if (num_in_signals == 2)
- {
- cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- }
- else if (num_in_signals == 3)
- {
- cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
- }
-
- for (int i = 1; i < num_gates; i++)
- {
- cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
- cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+void Decoder::compute_area() {
+ double cumulative_area = 0;
+ double cumulative_curr = 0; // cumulative leakage current
+ double cumulative_curr_Ig = 0; // cumulative leakage current
+
+ if (exist) { // First check if this decoder exists
+ if (num_in_signals == 2) {
+ cumulative_area =
+ compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
+ } else if (num_in_signals == 3) {
+ cumulative_area =
+ compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
+ cumulative_curr =
+ cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
+ }
+
+ for (int i = 1; i < num_gates; i++) {
+ cumulative_area +=
+ compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
+ cumulative_curr +=
+ cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ cumulative_curr_Ig =
+ cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
+ }
+ power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
+
+ area.w = (cumulative_area / area.h);
}
- power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
-
- area.w = (cumulative_area / area.h);
- }
}
-double Decoder::compute_delays(double inrisetime)
-{
- if (exist)
- {
- double ret_val = 0; // outrisetime
- int i;
- double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
- double Vdd = g_tp.peri_global.Vdd;
+double Decoder::compute_delays(double inrisetime) {
+ if (exist) {
+ double ret_val = 0; // outrisetime
+ int i;
+ double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
+ double Vdd = g_tp.peri_global.Vdd;
- if ((is_wl_tr) && (is_dram))
- {
- Vpp = g_tp.vpp;
- }
- else if (is_wl_tr)
- {
- Vpp = g_tp.sram_cell.Vdd;
- }
- else
- {
- Vpp = g_tp.peri_global.Vdd;
- }
+ if ((is_wl_tr) && (is_dram)) {
+ Vpp = g_tp.vpp;
+ } else if (is_wl_tr) {
+ Vpp = g_tp.sram_cell.Vdd;
+ } else {
+ Vpp = g_tp.peri_global.Vdd;
+ }
- // first check whether a decoder is required at all
- rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
- drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- for (i = 1; i < num_gates - 1; ++i)
- {
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ // first check whether a decoder is required at all
+ rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
+ drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ for (i = 1; i < num_gates - 1; ++i) {
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+ }
+
+ // add delay of final inverter that drives the wordline
+ i = num_gates - 1;
+ c_load = C_ld_dec_out;
+ rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
+ c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
+ drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
+ tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ ret_val = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
+
+ return ret_val;
+ } else {
+ return 0.0;
}
-
- // add delay of final inverter that drives the wordline
- i = num_gates - 1;
- c_load = C_ld_dec_out;
- rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
- drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- ret_val = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
-
- return ret_val;
- }
- else
- {
- return 0.0;
- }
}
void Decoder::leakage_feedback(double temperature)
@@ -291,610 +269,568 @@ PredecBlk::PredecBlk(
int num_dec_per_predec,
bool is_dram,
bool is_blk1)
- :dec(dec_),
- exist(false),
- number_input_addr_bits(0),
- C_ld_predec_blk_out(0),
- R_wire_predec_blk_out(0),
- branch_effort_nand2_gate_output(1),
- branch_effort_nand3_gate_output(1),
- flag_two_unique_paths(false),
- flag_L2_gate(0),
- number_inputs_L1_gate(0),
- number_gates_L1_nand2_path(0),
- number_gates_L1_nand3_path(0),
- number_gates_L2(0),
- min_number_gates_L1(2),
- min_number_gates_L2(2),
- num_L1_active_nand2_path(0),
- num_L1_active_nand3_path(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- power_L2(),
- is_dram_(is_dram)
-{
- int branch_effort_predec_out;
- double C_ld_dec_gate;
- int num_addr_bits_dec = _log2(num_dec_signals);
- int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
- int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
-
- w_L1_nand2_n[0] = 0;
- w_L1_nand2_p[0] = 0;
- w_L1_nand3_n[0] = 0;
- w_L1_nand3_p[0] = 0;
-
- if (is_blk1 == true)
- {
- if (num_addr_bits_dec <= 0)
- {
- return;
+ : dec(dec_),
+ exist(false),
+ number_input_addr_bits(0),
+ C_ld_predec_blk_out(0),
+ R_wire_predec_blk_out(0),
+ branch_effort_nand2_gate_output(1),
+ branch_effort_nand3_gate_output(1),
+ flag_two_unique_paths(false),
+ flag_L2_gate(0),
+ number_inputs_L1_gate(0),
+ number_gates_L1_nand2_path(0),
+ number_gates_L1_nand3_path(0),
+ number_gates_L2(0),
+ min_number_gates_L1(2),
+ min_number_gates_L2(2),
+ num_L1_active_nand2_path(0),
+ num_L1_active_nand3_path(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ power_L2(),
+ is_dram_(is_dram) {
+ int branch_effort_predec_out;
+ double C_ld_dec_gate;
+ int num_addr_bits_dec = _log2(num_dec_signals);
+ int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
+ int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
+
+ w_L1_nand2_n[0] = 0;
+ w_L1_nand2_p[0] = 0;
+ w_L1_nand3_n[0] = 0;
+ w_L1_nand3_p[0] = 0;
+
+ if (is_blk1 == true) {
+ if (num_addr_bits_dec <= 0) {
+ return;
+ } else if (num_addr_bits_dec < 4) {
+ // Just one predecoder block is required with NAND2 gates. No decoder required.
+ // The first level of predecoding directly drives the decoder output load
+ exist = true;
+ number_input_addr_bits = num_addr_bits_dec;
+ R_wire_predec_blk_out = dec->R_wire_dec_out;
+ C_ld_predec_blk_out = dec->C_ld_dec_out;
+ } else {
+ exist = true;
+ number_input_addr_bits = blk1_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
+ } else {
+ if (num_addr_bits_dec >= 4) {
+ exist = true;
+ number_input_addr_bits = blk2_num_input_addr_bits;
+ branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
+ C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
+ R_wire_predec_blk_out = R_wire_predec_blk_out_;
+ C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
+ }
}
- else if (num_addr_bits_dec < 4)
- {
- // Just one predecoder block is required with NAND2 gates. No decoder required.
- // The first level of predecoding directly drives the decoder output load
- exist = true;
- number_input_addr_bits = num_addr_bits_dec;
- R_wire_predec_blk_out = dec->R_wire_dec_out;
- C_ld_predec_blk_out = dec->C_ld_dec_out;
- }
- else
- {
- exist = true;
- number_input_addr_bits = blk1_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- else
- {
- if (num_addr_bits_dec >= 4)
- {
- exist = true;
- number_input_addr_bits = blk2_num_input_addr_bits;
- branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
- C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
- R_wire_predec_blk_out = R_wire_predec_blk_out_;
- C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
- }
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlk::compute_widths()
-{
- double F, c_load_nand3_path, c_load_nand2_path;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+void PredecBlk::compute_widths() {
+ double F, c_load_nand3_path, c_load_nand2_path;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
+ double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- if (exist == false) return;
+ if (exist == false) return;
- switch (number_input_addr_bits)
- {
+ switch (number_input_addr_bits) {
case 1:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 2:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 0;
- break;
- case 3:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 0;
- break;
- case 4:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 2;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 4;
- break;
- case 5:
- flag_two_unique_paths = true;
- flag_L2_gate = 2;
- branch_effort_nand2_gate_output = 8;
- branch_effort_nand3_gate_output = 4;
- break;
- case 6:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 2;
- branch_effort_nand3_gate_output = 8;
- break;
- case 7:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 32;
- branch_effort_nand3_gate_output = 16;
- break;
- case 8:
- flag_two_unique_paths = true;
- flag_L2_gate = 3;
- branch_effort_nand2_gate_output = 64;
- branch_effort_nand3_gate_output = 32;
- break;
- case 9:
- flag_two_unique_paths = false;
- number_inputs_L1_gate = 3;
- flag_L2_gate = 3;
- branch_effort_nand3_gate_output = 64;
- break;
- default:
- assert(0);
- break;
- }
-
- // find the number of gates and sizing in second level of predecoder (if there is a second level)
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- { // 2nd level is a NAND2 gate
- w_L2_n[0] = 2 * g_tp.min_w_nmos_;
- F = gnand2;
- }
- else
- { // 2nd level is a NAND3 gate
- w_L2_n[0] = 3 * g_tp.min_w_nmos_;
- F = gnand3;
- }
- w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- number_gates_L2 = logical_effort(
- min_number_gates_L2,
- flag_L2_gate == 2 ? gnand2 : gnand3,
- F,
- w_L2_n,
- w_L2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
-
- // Now find the number of gates and widths in first level of predecoder
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
- { // Whenever flag_two_unique_paths is true, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
- // a NAND2 gate is used in the first level of the predecoder
- c_load_nand2_path = branch_effort_nand2_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2 * c_load_nand2_path /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- c_load_nand2_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
-
- //Now find widths of gates along path in which first gate is a NAND3
- if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
- { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
- // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
- // a NAND3 gate is used in the first level of the predecoder
- c_load_nand3_path = branch_effort_nand3_gate_output *
- (gate_C(w_L2_n[0], 0, is_dram_) +
- gate_C(w_L2_p[0], 0, is_dram_));
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3 * c_load_nand3_path /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- c_load_nand3_path,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
- else
- { // find number of gates and widths in first level of predecoder block when there is no second level
- if (number_inputs_L1_gate == 2)
- {
- w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
- w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2*C_ld_predec_blk_out /
- (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
- gate_C(w_L1_nand2_p[0], 0, is_dram_));
- number_gates_L1_nand2_path = logical_effort(
- min_number_gates_L1,
- gnand2,
- F,
- w_L1_nand2_n,
- w_L1_nand2_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- else if (number_inputs_L1_gate == 3)
- {
- w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
- w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3*C_ld_predec_blk_out /
- (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
- gate_C(w_L1_nand3_p[0], 0, is_dram_));
- number_gates_L1_nand3_path = logical_effort(
- min_number_gates_L1,
- gnand3,
- F,
- w_L1_nand3_n,
- w_L1_nand3_p,
- C_ld_predec_blk_out,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
- }
- }
-}
-
-
-
-void PredecBlk::compute_area()
-{
- if (exist)
- { // First check whether a predecoder block is needed
- int num_L1_nand2 = 0;
- int num_L1_nand3 = 0;
- int num_L2 = 0;
- double tot_area_L1_nand3 =0;
- double leak_L1_nand3 =0;
- double gate_leak_L1_nand3 =0;
-
- double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
- double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
- if (number_inputs_L1_gate != 3) {
- tot_area_L1_nand3 = 0;
- leak_L1_nand3 = 0;
- gate_leak_L1_nand3 =0;
- }
- else {
- tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
- leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
- }
-
- switch (number_input_addr_bits)
- {
- case 1: //2 NAND2 gates
- num_L1_nand2 = 2;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 2: //4 NAND2 gates
- num_L1_nand2 = 4;
- num_L2 = 0;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =0;
+ case 2:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 0;
break;
- case 3: //8 NAND3 gates
- num_L1_nand3 = 8;
- num_L2 = 0;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =1;
+ case 3:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 0;
break;
- case 4: //4 + 4 NAND2 gates
- num_L1_nand2 = 8;
- num_L2 = 16;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =0;
+ case 4:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 2;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 4;
break;
- case 5: //4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 8;
- num_L2 = 32;
- num_L1_active_nand2_path =1;
- num_L1_active_nand3_path =1;
+ case 5:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 2;
+ branch_effort_nand2_gate_output = 8;
+ branch_effort_nand3_gate_output = 4;
break;
- case 6: //8 + 8 NAND3 gates
- num_L1_nand3 = 16;
- num_L2 = 64;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =2;
+ case 6:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 2;
+ branch_effort_nand3_gate_output = 8;
break;
- case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
- num_L1_nand2 = 8;
- num_L1_nand3 = 8;
- num_L2 = 128;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =1;
+ case 7:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 32;
+ branch_effort_nand3_gate_output = 16;
break;
- case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
- num_L1_nand2 = 4;
- num_L1_nand3 = 16;
- num_L2 = 256;
- num_L1_active_nand2_path =2;
- num_L1_active_nand3_path =2;
+ case 8:
+ flag_two_unique_paths = true;
+ flag_L2_gate = 3;
+ branch_effort_nand2_gate_output = 64;
+ branch_effort_nand3_gate_output = 32;
break;
- case 9: //8 + 8 + 8 NAND3 gates
- num_L1_nand3 = 24;
- num_L2 = 512;
- num_L1_active_nand2_path =0;
- num_L1_active_nand3_path =3;
+ case 9:
+ flag_two_unique_paths = false;
+ number_inputs_L1_gate = 3;
+ flag_L2_gate = 3;
+ branch_effort_nand3_gate_output = 64;
break;
- default:
+ default:
+ assert(0);
break;
}
- for (int i = 1; i < number_gates_L1_nand2_path; ++i)
- {
- tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
- leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
- gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ // find the number of gates and sizing in second level of predecoder (if there is a second level)
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
+ w_L2_n[0] = 2 * g_tp.min_w_nmos_;
+ F = gnand2;
+ } else { // 2nd level is a NAND3 gate
+ w_L2_n[0] = 3 * g_tp.min_w_nmos_;
+ F = gnand3;
+ }
+ w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
+ number_gates_L2 = logical_effort(
+ min_number_gates_L2,
+ flag_L2_gate == 2 ? gnand2 : gnand3,
+ F,
+ w_L2_n,
+ w_L2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+
+ // Now find the number of gates and widths in first level of predecoder
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ // Whenever flag_two_unique_paths is true, it means first level of
+ // decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
+ // it means
+ // a NAND2 gate is used in the first level of the predecoder
+ c_load_nand2_path = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * c_load_nand2_path /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ c_load_nand2_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+
+ //Now find widths of gates along path in which first gate is a NAND3
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
+ // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
+ // a NAND3 gate is used in the first level of the predecoder
+ c_load_nand3_path = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * c_load_nand3_path /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ c_load_nand3_path,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
+ } else { // find number of gates and widths in first level of predecoder block when there is no second level
+ if (number_inputs_L1_gate == 2) {
+ w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
+ w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand2 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand2_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand2_p[0], 0, is_dram_));
+ number_gates_L1_nand2_path = logical_effort(
+ min_number_gates_L1,
+ gnand2,
+ F,
+ w_L1_nand2_n,
+ w_L1_nand2_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ } else if (number_inputs_L1_gate == 3) {
+ w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
+ w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+ F = gnand3 * C_ld_predec_blk_out /
+ (gate_C(w_L1_nand3_n[0], 0, is_dram_) +
+ gate_C(w_L1_nand3_p[0], 0, is_dram_));
+ number_gates_L1_nand3_path = logical_effort(
+ min_number_gates_L1,
+ gnand3,
+ F,
+ w_L1_nand3_n,
+ w_L1_nand3_p,
+ C_ld_predec_blk_out,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
+ }
}
- tot_area_L1_nand2 *= num_L1_nand2;
- leak_L1_nand2 *= num_L1_nand2;
- gate_leak_L1_nand2 *= num_L1_nand2;
-
- for (int i = 1; i < number_gates_L1_nand3_path; ++i)
- {
- tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
- leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
- }
- tot_area_L1_nand3 *= num_L1_nand3;
- leak_L1_nand3 *= num_L1_nand3;
- gate_leak_L1_nand3 *= num_L1_nand3;
+}
- double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
- double cumulative_area_L2 = 0.0;
- double leakage_L2 = 0.0;
- double gate_leakage_L2 = 0.0;
- if (flag_L2_gate == 2)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
- }
- else if (flag_L2_gate == 3)
- {
- cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
- leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
- }
- for (int i = 1; i < number_gates_L2; ++i)
- {
- cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
- leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
- gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+void PredecBlk::compute_area() {
+ if (exist) { // First check whether a predecoder block is needed
+ int num_L1_nand2 = 0;
+ int num_L1_nand3 = 0;
+ int num_L2 = 0;
+ double tot_area_L1_nand3 = 0;
+ double leak_L1_nand3 = 0;
+ double gate_leak_L1_nand3 = 0;
+
+ double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
+ double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
+ if (number_inputs_L1_gate != 3) {
+ tot_area_L1_nand3 = 0;
+ leak_L1_nand3 = 0;
+ gate_leak_L1_nand3 = 0;
+ } else {
+ tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
+ leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
+ }
+
+ switch (number_input_addr_bits) {
+ case 1: //2 NAND2 gates
+ num_L1_nand2 = 2;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 2: //4 NAND2 gates
+ num_L1_nand2 = 4;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 3: //8 NAND3 gates
+ num_L1_nand3 = 8;
+ num_L2 = 0;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 4: //4 + 4 NAND2 gates
+ num_L1_nand2 = 8;
+ num_L2 = 16;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 0;
+ break;
+ case 5: //4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 8;
+ num_L2 = 32;
+ num_L1_active_nand2_path = 1;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 6: //8 + 8 NAND3 gates
+ num_L1_nand3 = 16;
+ num_L2 = 64;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
+ num_L1_nand2 = 8;
+ num_L1_nand3 = 8;
+ num_L2 = 128;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 1;
+ break;
+ case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
+ num_L1_nand2 = 4;
+ num_L1_nand3 = 16;
+ num_L2 = 256;
+ num_L1_active_nand2_path = 2;
+ num_L1_active_nand3_path = 2;
+ break;
+ case 9: //8 + 8 + 8 NAND3 gates
+ num_L1_nand3 = 24;
+ num_L2 = 512;
+ num_L1_active_nand2_path = 0;
+ num_L1_active_nand3_path = 3;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
+ tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
+ leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
+ }
+ tot_area_L1_nand2 *= num_L1_nand2;
+ leak_L1_nand2 *= num_L1_nand2;
+ gate_leak_L1_nand2 *= num_L1_nand2;
+
+ for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
+ tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
+ leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
+ }
+ tot_area_L1_nand3 *= num_L1_nand3;
+ leak_L1_nand3 *= num_L1_nand3;
+ gate_leak_L1_nand3 *= num_L1_nand3;
+
+ double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
+ double cumulative_area_L2 = 0.0;
+ double leakage_L2 = 0.0;
+ double gate_leakage_L2 = 0.0;
+
+ if (flag_L2_gate == 2) {
+ cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
+ } else if (flag_L2_gate == 3) {
+ cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
+ leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
+ }
+
+ for (int i = 1; i < number_gates_L2; ++i) {
+ cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
+ leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
+ }
+ cumulative_area_L2 *= num_L2;
+ leakage_L2 *= num_L2;
+ gate_leakage_L2 *= num_L2;
+
+ power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
+ area.set_area(cumulative_area_L1 + cumulative_area_L2);
+ power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
+ power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
}
- cumulative_area_L2 *= num_L2;
- leakage_L2 *= num_L2;
- gate_leakage_L2 *= num_L2;
-
- power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
- area.set_area(cumulative_area_L1 + cumulative_area_L2);
- power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
- power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
- }
}
pair<double, double> PredecBlk::compute_delays(
- pair<double, double> inrisetime) // <nand2, nand3>
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
-
- double inrisetime_nand2_path = inrisetime.first;
- double inrisetime_nand3_path = inrisetime.second;
- int i;
- double rd, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- // TODO: following delay calculation part can be greatly simplified.
- // first check whether a predecoder block is required
- if (exist)
- {
- //Find delay in first level of predecoder block
- //First find delay in path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
- {
- //First gate is a NAND2 gate
- rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand2_path - 1;
- rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ pair<double, double> inrisetime) { // <nand2, nand3>
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
- if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
- { //Check if the number of gates in the first level is more than 1.
- //First gate is a NAND3 gate
- rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
-
- //Add delays of all but the last inverter in the chain
- for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
- {
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of the last inverter
- i = number_gates_L1_nand3_path - 1;
- rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
- if (flag_L2_gate)
- {
- c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { //First level directly drives decoder output load
- c_load = C_ld_predec_blk_out;
- c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- }
+ double inrisetime_nand2_path = inrisetime.first;
+ double inrisetime_nand3_path = inrisetime.second;
+ int i;
+ double rd, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Find delay through second level
- if (flag_L2_gate)
- {
- if (flag_L2_gate == 2)
- {
- rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
- else
- { // flag_L2_gate = 3
- rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
- c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
- c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- for (i = 1; i < number_gates_L2 - 1; ++i)
- {
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
- }
-
- //Add delay of final inverter that drives the wordline decoders
- i = number_gates_L2 - 1;
- c_load = C_ld_predec_blk_out;
- rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ // TODO: following delay calculation part can be greatly simplified.
+ // first check whether a predecoder block is required
+ if (exist) {
+ //Find delay in first level of predecoder block
+ //First find delay in path
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
+ //First gate is a NAND2 gate
+ rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand2_path - 1;
+ rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand2_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) +
+ gate_C(w_L2_p[0], 0, is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
+ //Check if the number of gates in the first level is more than 1.
+ //First gate is a NAND3 gate
+ rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+
+ //Add delays of all but the last inverter in the chain
+ for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of the last inverter
+ i = number_gates_L1_nand3_path - 1;
+ rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
+ if (flag_L2_gate) {
+ c_load = branch_effort_nand3_gate_output *
+ (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
+ is_dram_));
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { //First level directly drives decoder output load
+ c_load = C_ld_predec_blk_out;
+ c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+ }
+
+ // Find delay through second level
+ if (flag_L2_gate) {
+ if (flag_L2_gate == 2) {
+ rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ } else { // flag_L2_gate = 3
+ rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
+ c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
+ c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ for (i = 1; i < number_gates_L2 - 1; ++i) {
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
+
+ //Add delay of final inverter that drives the wordline decoders
+ i = number_gates_L2 - 1;
+ c_load = C_ld_predec_blk_out;
+ rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
+ }
}
- }
- delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
- return ret_val;
+ delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
+ return ret_val;
}
void PredecBlk::leakage_feedback(double temperature)
@@ -1033,302 +969,287 @@ PredecBlkDrv::PredecBlkDrv(
int way_select_,
PredecBlk * blk_,
bool is_dram)
- :flag_driver_exists(0),
- number_gates_nand2_path(0),
- number_gates_nand3_path(0),
- min_number_gates(2),
- num_buffers_driving_1_nand2_load(0),
- num_buffers_driving_2_nand2_load(0),
- num_buffers_driving_4_nand2_load(0),
- num_buffers_driving_2_nand3_load(0),
- num_buffers_driving_8_nand3_load(0),
- num_buffers_nand3_path(0),
- c_load_nand2_path_out(0),
- c_load_nand3_path_out(0),
- r_load_nand2_path_out(0),
- r_load_nand3_path_out(0),
- delay_nand2_path(0),
- delay_nand3_path(0),
- power_nand2_path(),
- power_nand3_path(),
- blk(blk_), dec(blk->dec),
- is_dram_(is_dram),
- way_select(way_select_)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_nand2_path_n[i] = 0;
- width_nand2_path_p[i] = 0;
- width_nand3_path_n[i] = 0;
- width_nand3_path_p[i] = 0;
- }
-
- number_input_addr_bits = blk->number_input_addr_bits;
-
- if (way_select > 1)
- {
- flag_driver_exists = 1;
- number_input_addr_bits = way_select;
- if (dec->num_in_signals == 2)
- {
- c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand2_load = number_input_addr_bits;
- }
- else if (dec->num_in_signals == 3)
- {
- c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
- num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ : flag_driver_exists(0),
+ number_gates_nand2_path(0),
+ number_gates_nand3_path(0),
+ min_number_gates(2),
+ num_buffers_driving_1_nand2_load(0),
+ num_buffers_driving_2_nand2_load(0),
+ num_buffers_driving_4_nand2_load(0),
+ num_buffers_driving_2_nand3_load(0),
+ num_buffers_driving_8_nand3_load(0),
+ num_buffers_nand3_path(0),
+ c_load_nand2_path_out(0),
+ c_load_nand3_path_out(0),
+ r_load_nand2_path_out(0),
+ r_load_nand3_path_out(0),
+ delay_nand2_path(0),
+ delay_nand3_path(0),
+ power_nand2_path(),
+ power_nand3_path(),
+ blk(blk_), dec(blk->dec),
+ is_dram_(is_dram),
+ way_select(way_select_) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_nand2_path_n[i] = 0;
+ width_nand2_path_p[i] = 0;
+ width_nand3_path_n[i] = 0;
+ width_nand3_path_p[i] = 0;
}
- }
- else if (way_select == 0)
- {
- if (blk->exist)
- {
- flag_driver_exists = 1;
+
+ number_input_addr_bits = blk->number_input_addr_bits;
+
+ if (way_select > 1) {
+ flag_driver_exists = 1;
+ number_input_addr_bits = way_select;
+ if (dec->num_in_signals == 2) {
+ c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand2_load = number_input_addr_bits;
+ } else if (dec->num_in_signals == 3) {
+ c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
+ num_buffers_driving_2_nand3_load = number_input_addr_bits;
+ }
+ } else if (way_select == 0) {
+ if (blk->exist) {
+ flag_driver_exists = 1;
+ }
}
- }
- compute_widths();
- compute_area();
+ compute_widths();
+ compute_area();
}
-void PredecBlkDrv::compute_widths()
-{
- // The predecode block driver accepts as input the address bits from the h-tree network. For
- // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
- // inversion to generate addrbar and simply treat addrbar as addr.
-
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
-
- if (flag_driver_exists)
- {
- double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
- double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
-
- if (way_select == 0)
- {
- if (blk->number_input_addr_bits == 1)
- { //2 NAND2 gates
- num_buffers_driving_2_nand2_load = 1;
- c_load_nand2_path_out = 2 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 2)
- { //4 NAND2 gates one 2-4 decoder
- num_buffers_driving_4_nand2_load = 2;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 3)
- { //8 NAND3 gates one 3-8 decoder
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 4)
- { //4 + 4 NAND2 gates two 2-4 decoder
- num_buffers_driving_4_nand2_load = 4;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- }
- else if (blk->number_input_addr_bits == 5)
- { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 6)
- { //8 + 8 NAND3 gates two 3-8 decoder
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 7)
- { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
- num_buffers_driving_4_nand2_load = 4;
- num_buffers_driving_8_nand3_load = 3;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 8)
- { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
- num_buffers_driving_4_nand2_load = 2;
- num_buffers_driving_8_nand3_load = 6;
- c_load_nand2_path_out = 4 * C_nand2_gate_blk;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- else if (blk->number_input_addr_bits == 9)
- { //8 + 8 + 8 NAND3 gates three 3-8 decoder
- num_buffers_driving_8_nand3_load = 9;
- c_load_nand3_path_out = 8 * C_nand3_gate_blk;
- }
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 2) ||
- (number_input_addr_bits == 0) ||
- ((way_select)&&(dec->num_in_signals == 2)))
- { //this means that way_select is driving NAND2 in decoder.
- width_nand2_path_n[0] = g_tp.min_w_nmos_;
- width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
- F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
- number_gates_nand2_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand2_path_n,
- width_nand2_path_p,
- c_load_nand2_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
- }
-
- if ((blk->flag_two_unique_paths) ||
- (blk->number_inputs_L1_gate == 3) ||
- ((way_select)&&(dec->num_in_signals == 3)))
- { //this means that way_select is driving NAND3 in decoder.
- width_nand3_path_n[0] = g_tp.min_w_nmos_;
- width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
- F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
- number_gates_nand3_path = logical_effort(
- min_number_gates,
- 1,
- F,
- width_nand3_path_n,
- width_nand3_path_p,
- c_load_nand3_path_out,
- p_to_n_sz_ratio,
- is_dram_, false, g_tp.max_w_nmos_);
+void PredecBlkDrv::compute_widths() {
+ // The predecode block driver accepts as input the address bits from the h-tree network. For
+ // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
+ // inversion to generate addrbar and simply treat addrbar as addr.
+
+ double F;
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+
+ if (flag_driver_exists) {
+ double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
+ double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
+
+ if (way_select == 0) {
+ if (blk->number_input_addr_bits == 1) {
+ //2 NAND2 gates
+ num_buffers_driving_2_nand2_load = 1;
+ c_load_nand2_path_out = 2 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 2) {
+ //4 NAND2 gates one 2-4 decoder
+ num_buffers_driving_4_nand2_load = 2;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 3) {
+ //8 NAND3 gates one 3-8 decoder
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 4) {
+ //4 + 4 NAND2 gates two 2-4 decoder
+ num_buffers_driving_4_nand2_load = 4;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ } else if (blk->number_input_addr_bits == 5) {
+ //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 6) {
+ //8 + 8 NAND3 gates two 3-8 decoder
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 7) {
+ //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 4;
+ num_buffers_driving_8_nand3_load = 3;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 8) {
+ //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
+ //decoder
+ num_buffers_driving_4_nand2_load = 2;
+ num_buffers_driving_8_nand3_load = 6;
+ c_load_nand2_path_out = 4 * C_nand2_gate_blk;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ } else if (blk->number_input_addr_bits == 9) {
+ //8 + 8 + 8 NAND3 gates three 3-8 decoder
+ num_buffers_driving_8_nand3_load = 9;
+ c_load_nand3_path_out = 8 * C_nand3_gate_blk;
+ }
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 2) ||
+ (number_input_addr_bits == 0) ||
+ ((way_select) && (dec->num_in_signals == 2))) {
+ //this means that way_select is driving NAND2 in decoder.
+ width_nand2_path_n[0] = g_tp.min_w_nmos_;
+ width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
+ F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
+ number_gates_nand2_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand2_path_n,
+ width_nand2_path_p,
+ c_load_nand2_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
+
+ if ((blk->flag_two_unique_paths) ||
+ (blk->number_inputs_L1_gate == 3) ||
+ ((way_select) && (dec->num_in_signals == 3))) {
+ //this means that way_select is driving NAND3 in decoder.
+ width_nand3_path_n[0] = g_tp.min_w_nmos_;
+ width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
+ F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
+ number_gates_nand3_path = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_nand3_path_n,
+ width_nand3_path_p,
+ c_load_nand3_path_out,
+ p_to_n_sz_ratio,
+ is_dram_, false, g_tp.max_w_nmos_);
+ }
}
- }
}
-void PredecBlkDrv::compute_area()
-{
- double area_nand2_path = 0;
- double area_nand3_path = 0;
- double leak_nand2_path = 0;
- double leak_nand3_path = 0;
- double gate_leak_nand2_path = 0;
- double gate_leak_nand3_path = 0;
-
- if (flag_driver_exists)
- { // first check whether a predecoder block driver is needed
- for (int i = 0; i < number_gates_nand2_path; ++i)
- {
- area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
- leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- }
- area_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- leak_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+void PredecBlkDrv::compute_area() {
+ double area_nand2_path = 0;
+ double area_nand3_path = 0;
+ double leak_nand2_path = 0;
+ double leak_nand3_path = 0;
+ double gate_leak_nand2_path = 0;
+ double gate_leak_nand3_path = 0;
+
+ if (flag_driver_exists) {
+ // first check whether a predecoder block driver is needed
+ for (int i = 0; i < number_gates_nand2_path; ++i) {
+ area_nand2_path +=
+ compute_gate_area(INV, 1, width_nand2_path_p[i],
+ width_nand2_path_n[i], g_tp.cell_h_def);
+ leak_nand2_path +=
+ cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand2_path +=
+ cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
-
- for (int i = 0; i < number_gates_nand3_path; ++i)
- {
- area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
- leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
- gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
+ leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+ gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
+ num_buffers_driving_2_nand2_load +
+ num_buffers_driving_4_nand2_load);
+
+ for (int i = 0; i < number_gates_nand3_path; ++i) {
+ area_nand3_path +=
+ compute_gate_area(INV, 1, width_nand3_path_p[i],
+ width_nand3_path_n[i], g_tp.cell_h_def);
+ leak_nand3_path +=
+ cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ gate_leak_nand3_path +=
+ cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
+ 1, inv, is_dram_);
+ }
+ area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+ gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
+
+ power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
+ power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
+ power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
+ area.set_area(area_nand2_path + area_nand3_path);
}
- area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
-
- power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
- power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
- area.set_area(area_nand2_path + area_nand3_path);
- }
}
pair<double, double> PredecBlkDrv::compute_delays(
double inrisetime_nand2_path,
- double inrisetime_nand3_path)
-{
- pair<double, double> ret_val;
- ret_val.first = 0; // outrisetime_nand2_path
- ret_val.second = 0; // outrisetime_nand3_path
- int i;
- double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
- double Vdd = g_tp.peri_global.Vdd;
-
- if (flag_driver_exists)
- {
- for (i = 0; i < number_gates_nand2_path - 1; ++i)
- {
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- inrisetime_nand2_path = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
+ double inrisetime_nand3_path) {
+ pair<double, double> ret_val;
+ ret_val.first = 0; // outrisetime_nand2_path
+ ret_val.second = 0; // outrisetime_nand3_path
+ int i;
+ double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
+ double Vdd = g_tp.peri_global.Vdd;
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand2_path != 0)
- {
- i = number_gates_nand2_path - 1;
- rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand2_path_out;
- tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2;
- this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
- delay_nand2_path += this_delay;
- ret_val.first = this_delay / (1.0 - 0.5);
- power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ if (flag_driver_exists) {
+ for (i = 0; i < number_gates_nand2_path - 1; ++i) {
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ inrisetime_nand2_path = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand2_path != 0) {
+ i = number_gates_nand2_path - 1;
+ rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand2_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
+ delay_nand2_path += this_delay;
+ ret_val.first = this_delay / (1.0 - 0.5);
+ power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
+ }
+
+ for (i = 0; i < number_gates_nand3_path - 1; ++i) {
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_gate_load);
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ inrisetime_nand3_path = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
+ }
+
+ // Final inverter drives the predecoder block or the decoder output load
+ if (number_gates_nand3_path != 0) {
+ i = number_gates_nand3_path - 1;
+ rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
+ c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ c_load = c_load_nand3_path_out;
+ tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
+ this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
+ delay_nand3_path += this_delay;
+ ret_val.second = this_delay / (1.0 - 0.5);
+ power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
+ }
}
-
- for (i = 0; i < number_gates_nand3_path - 1; ++i)
- {
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- inrisetime_nand3_path = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
- }
-
- // Final inverter drives the predecoder block or the decoder output load
- if (number_gates_nand3_path != 0)
- {
- i = number_gates_nand3_path - 1;
- rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- c_load = c_load_nand3_path_out;
- tf = rd*(c_intrinsic + c_load) + r_load_nand3_path_out*c_load / 2;
- this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
- delay_nand3_path += this_delay;
- ret_val.second = this_delay / (1.0 - 0.5);
- power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
- }
- }
- return ret_val;
+ return ret_val;
}
-double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
-{
- return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
- num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
+double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
+ return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
+ num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
}
@@ -1336,31 +1257,30 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
Predec::Predec(
PredecBlkDrv * drv1_,
PredecBlkDrv * drv2_)
-:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
-{
- driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
- drv1->power_nand3_path.readOp.leakage +
- drv2->power_nand2_path.readOp.leakage +
- drv2->power_nand3_path.readOp.leakage;
- block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
- blk1->power_nand3_path.readOp.leakage +
- blk1->power_L2.readOp.leakage +
- blk2->power_nand2_path.readOp.leakage +
- blk2->power_nand3_path.readOp.leakage +
- blk2->power_L2.readOp.leakage;
- power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
-
- driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
- drv1->power_nand3_path.readOp.gate_leakage +
- drv2->power_nand2_path.readOp.gate_leakage +
- drv2->power_nand3_path.readOp.gate_leakage;
- block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
- blk1->power_nand3_path.readOp.gate_leakage +
- blk1->power_L2.readOp.gate_leakage +
- blk2->power_nand2_path.readOp.gate_leakage +
- blk2->power_nand3_path.readOp.gate_leakage +
- blk2->power_L2.readOp.gate_leakage;
- power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
+ : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
+ driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
+ drv1->power_nand3_path.readOp.leakage +
+ drv2->power_nand2_path.readOp.leakage +
+ drv2->power_nand3_path.readOp.leakage;
+ block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
+ blk1->power_nand3_path.readOp.leakage +
+ blk1->power_L2.readOp.leakage +
+ blk2->power_nand2_path.readOp.leakage +
+ blk2->power_nand3_path.readOp.leakage +
+ blk2->power_L2.readOp.leakage;
+ power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
+
+ driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
+ drv1->power_nand3_path.readOp.gate_leakage +
+ drv2->power_nand2_path.readOp.gate_leakage +
+ drv2->power_nand3_path.readOp.gate_leakage;
+ block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
+ blk1->power_nand3_path.readOp.gate_leakage +
+ blk1->power_L2.readOp.gate_leakage +
+ blk2->power_nand2_path.readOp.gate_leakage +
+ blk2->power_nand3_path.readOp.gate_leakage +
+ blk2->power_L2.readOp.gate_leakage;
+ power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
}
void PredecBlkDrv::leakage_feedback(double temperature)
@@ -1399,37 +1319,35 @@ void PredecBlkDrv::leakage_feedback(double temperature)
}
}
-double Predec::compute_delays(double inrisetime)
-{
- // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
- pair<double, double> tmp_pair1, tmp_pair2;
- tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
- tmp_pair1 = blk1->compute_delays(tmp_pair1);
- tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
- tmp_pair2 = blk2->compute_delays(tmp_pair2);
- tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
-
- driver_power.readOp.dynamic =
- drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
- drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
- drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
- drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
-
- block_power.readOp.dynamic =
- blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk1->power_L2.readOp.dynamic +
- blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk2->power_L2.readOp.dynamic;
-
- power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
-
- delay = tmp_pair1.first;
- return tmp_pair1.second;
+double Predec::compute_delays(double inrisetime) {
+ // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
+ pair<double, double> tmp_pair1, tmp_pair2;
+ tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
+ tmp_pair1 = blk1->compute_delays(tmp_pair1);
+ tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
+ tmp_pair2 = blk2->compute_delays(tmp_pair2);
+ tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
+
+ driver_power.readOp.dynamic =
+ drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
+ drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
+ drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
+ drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
+
+ block_power.readOp.dynamic =
+ blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk1->power_L2.readOp.dynamic +
+ blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
+ blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
+ blk2->power_L2.readOp.dynamic;
+
+ power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
+
+ delay = tmp_pair1.first;
+ return tmp_pair1.second;
}
-
void Predec::leakage_feedback(double temperature)
{
drv1->leakage_feedback(temperature);
@@ -1465,113 +1383,116 @@ void Predec::leakage_feedback(double temperature)
// returns <delay, risetime>
pair<double, double> Predec::get_max_delay_before_decoder(
pair<double, double> input_pair1,
- pair<double, double> input_pair2)
-{
- pair<double, double> ret_val;
- double delay;
-
- delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
- ret_val.first = delay;
- ret_val.second = input_pair1.first;
- delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair1.second;
- }
- delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair2.first;
- }
- delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
- if (ret_val.first < delay)
- {
+ pair<double, double> input_pair2) {
+ pair<double, double> ret_val;
+ double delay;
+
+ delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
ret_val.first = delay;
- ret_val.second = input_pair2.second;
- }
+ ret_val.second = input_pair1.first;
+ delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair1.second;
+ }
+ delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.first;
+ }
+ delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
+ if (ret_val.first < delay) {
+ ret_val.first = delay;
+ ret_val.second = input_pair2.second;
+ }
- return ret_val;
+ return ret_val;
}
-Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
-:number_gates(0),
- min_number_gates(2),
- c_gate_load(c_gate_load_),
- c_wire_load(c_wire_load_),
- r_wire_load(r_wire_load_),
- delay(0),
- power(),
- is_dram_(is_dram)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_n[i] = 0;
- width_p[i] = 0;
- }
+Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
+ bool is_dram)
+ : number_gates(0),
+ min_number_gates(2),
+ c_gate_load(c_gate_load_),
+ c_wire_load(c_wire_load_),
+ r_wire_load(r_wire_load_),
+ delay(0),
+ power(),
+ is_dram_(is_dram) {
+ for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
+ width_n[i] = 0;
+ width_p[i] = 0;
+ }
- compute_widths();
+ compute_widths();
}
-void Driver::compute_widths()
-{
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double c_load = c_gate_load + c_wire_load;
- width_n[0] = g_tp.min_w_nmos_;
- width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
-
- double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
- number_gates = logical_effort(
- min_number_gates,
- 1,
- F,
- width_n,
- width_p,
- c_load,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
+void Driver::compute_widths() {
+ double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
+ double c_load = c_gate_load + c_wire_load;
+ width_n[0] = g_tp.min_w_nmos_;
+ width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
+
+ double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
+ number_gates = logical_effort(
+ min_number_gates,
+ 1,
+ F,
+ width_n,
+ width_p,
+ c_load,
+ p_to_n_sz_ratio,
+ is_dram_, false,
+ g_tp.max_w_nmos_);
}
-double Driver::compute_delay(double inrisetime)
-{
- int i;
- double rd, c_load, c_intrinsic, tf;
- double this_delay = 0;
+double Driver::compute_delay(double inrisetime) {
+ int i;
+ double rd, c_load, c_intrinsic, tf;
+ double this_delay = 0;
+
+ for (i = 0; i < number_gates - 1; ++i) {
+ rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
+ c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
+ c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load);
+ this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+ delay += this_delay;
+ inrisetime = this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ }
- for (i = 0; i < number_gates - 1; ++i)
- {
+ i = number_gates - 1;
+ c_load = c_gate_load + c_wire_load;
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
+ drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
+ tf = rd * (c_intrinsic + c_load) + r_wire_load *
+ (c_wire_load / 2 + c_gate_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
- }
-
- i = number_gates - 1;
- c_load = c_gate_load + c_wire_load;
- rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
-
- return this_delay / (1.0 - 0.5);
+ power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
+ g_tp.peri_global.Vdd;
+ power.readOp.leakage +=
+ cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+ power.readOp.gate_leakage +=
+ cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
+ g_tp.peri_global.Vdd;
+
+ return this_delay / (1.0 - 0.5);
}